From 2f2cdd5618a8de4c0b9cef4d0f2c45f570daab5a Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Apr 2024 09:30:39 +0200 Subject: [PATCH 001/109] Initial rework of the uarch of the CCU FSM * The original FSM is now split across three modules: decoder, memory_unit and snoop_unit. * The top level design is now identified as ccu_ctrl * Legacy parameter is used to simulate a blocking behavior --- Bender.yml | 7 +- src/ace_ccu_top.sv | 18 +- src/ccu_ctrl.sv | 256 +++++++++++++ src/ccu_ctrl_decoder.sv | 360 ++++++++++++++++++ src/ccu_ctrl_memory_unit.sv | 342 +++++++++++++++++ src/ccu_ctrl_pkg.sv | 18 + src/ccu_ctrl_snoop_unit.sv | 265 ++++++++++++++ src/ccu_fsm.sv | 705 ------------------------------------ 8 files changed, 1263 insertions(+), 708 deletions(-) create mode 100644 src/ccu_ctrl.sv create mode 100644 src/ccu_ctrl_decoder.sv create mode 100644 src/ccu_ctrl_memory_unit.sv create mode 100644 src/ccu_ctrl_pkg.sv create mode 100644 src/ccu_ctrl_snoop_unit.sv delete mode 100644 src/ccu_fsm.sv diff --git a/Bender.yml b/Bender.yml index 8a0199c..2fcd1ce 100644 --- a/Bender.yml +++ b/Bender.yml @@ -19,10 +19,15 @@ sources: # Level 1 - src/ace_intf.sv - src/snoop_intf.sv + - src/ccu_ctrl_pkg.sv # Level 2 - src/ace_trs_dec.sv - - src/ccu_fsm.sv + - src/ccu_ctrl_decoder.sv + - src/ccu_ctrl_memory_units.sv + - src/ccu_ctrl_snoop_unit.sv # Level 3 + - src/ccu_ctrl.sv + # Leval 4 - src/ace_ccu_top.sv - target: simulation diff --git a/src/ace_ccu_top.sv b/src/ace_ccu_top.sv index 8ba1fff..5afc120 100644 --- a/src/ace_ccu_top.sv +++ b/src/ace_ccu_top.sv @@ -38,6 +38,9 @@ module ace_ccu_top parameter type mst_resp_t = logic, parameter type mst_stg_req_t = logic, parameter type mst_stg_resp_t = logic, + parameter type snoop_ac_t = logic, + parameter type snoop_cr_t = logic, + parameter type snoop_cd_t = logic, parameter type snoop_req_t = logic, parameter type snoop_resp_t = logic @@ -214,17 +217,25 @@ axi_mux #( .mst_resp_i ( ccu_resps_mux_i ) ); -ccu_fsm #( +ccu_ctrl #( .DcacheLineWidth ( Cfg.DcacheLineWidth ), .AxiDataWidth ( Cfg.AxiDataWidth ), .NoMstPorts ( Cfg.NoSlvPorts ), .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports + .mst_aw_chan_t ( mst_stg_aw_chan_t ), // AW Channel Type, master port + .w_chan_t ( w_chan_t ), // W Channel Type, all ports + .mst_b_chan_t ( mst_stg_b_chan_t ), // B Channel Type, master port + .mst_ar_chan_t ( mst_stg_ar_chan_t ), // AR Channel Type, master port + .mst_r_chan_t ( mst_stg_r_chan_t ), // R Channel Type, master port .mst_req_t ( mst_stg_req_t ), .mst_resp_t ( mst_stg_resp_t ), + .snoop_ac_t ( snoop_ac_t ), + .snoop_cr_t ( snoop_cr_t ), + .snoop_cd_t ( snoop_cd_t ), .snoop_req_t ( snoop_req_t ), .snoop_resp_t ( snoop_resp_t ) -) fsm ( +) ccu_ctrl_i ( .clk_i, .rst_ni, .ccu_req_i ( ccu_reqs_mux_o ), @@ -337,6 +348,9 @@ module ace_ccu_top_intf .mst_resp_t ( mst_ace_resp_t ), .mst_stg_req_t ( mst_ace_stg_req_t ), .mst_stg_resp_t ( mst_ace_stg_resp_t ), + .snoop_ac_t ( snoop_ac_t ), + .snoop_cr_t ( snoop_cr_t ), + .snoop_cd_t ( snoop_cd_t ), .snoop_req_t ( snoop_req_t ), .snoop_resp_t ( snoop_resp_t ) ) i_ccu_top ( diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv new file mode 100644 index 0000000..b7fcb8f --- /dev/null +++ b/src/ccu_ctrl.sv @@ -0,0 +1,256 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +`include "ace/assign.svh" +`include "ace/typedef.svh" + +module ccu_ctrl import ccu_ctrl_pkg::*; +#( + parameter int unsigned DcacheLineWidth = 0, + parameter int unsigned AxiDataWidth = 0, + parameter int unsigned NoMstPorts = 4, + parameter int unsigned SlvAxiIDWidth = 0, + parameter type mst_aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type mst_b_chan_t = logic, + parameter type mst_ar_chan_t = logic, + parameter type mst_r_chan_t = logic, + parameter type mst_req_t = logic, + parameter type mst_resp_t = logic, + parameter type snoop_ac_t = logic, + parameter type snoop_cr_t = logic, + parameter type snoop_cd_t = logic, + parameter type snoop_req_t = logic, + parameter type snoop_resp_t = logic +) ( + //clock and reset + input clk_i, + input rst_ni, + // CCU Request In and response out + input mst_req_t ccu_req_i, + output mst_resp_t ccu_resp_o, + //CCU Request Out and response in + output mst_req_t ccu_req_o, + input mst_resp_t ccu_resp_i, + // Snoop channel resuest and response + output snoop_req_t [NoMstPorts-1:0] s2m_req_o, + input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i +); + +localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; +localparam int unsigned MstIdxBits = $clog2(NoMstPorts); + + +mst_resp_t mu_ccu_resp; +mst_req_t mu_ccu_req; + +su_op_e su_op; +mu_op_e mu_op; + +logic su_valid, mu_valid; + +logic su_ready, mu_ready; + +mst_req_t dec_ccu_req_holder; + +logic dec_shared, dec_dirty; + +logic [MstIdxBits-1:0] dec_first_responder; + +logic [NoMstPorts-1:0] su_cd_ready, mu_cd_ready; +logic su_cd_busy, mu_cd_busy; + +mst_r_chan_t su_r; +logic su_r_valid, su_r_ready; + +logic [NoMstPorts-1:0] data_available; + +logic ccu_ar_ready, ccu_aw_ready; + +snoop_req_t [NoMstPorts-1:0] dec_snoop_req; + +snoop_cd_t [NoMstPorts-1:0] cd; +logic [NoMstPorts-1:0] cd_valid; + +for (genvar i = 0; i < NoMstPorts; i++) begin + assign cd[i] = m2s_resp_i[i].cd; + assign cd_valid[i] = m2s_resp_i[i].cd_valid; +end + +ccu_ctrl_decoder #( + .DcacheLineWidth (DcacheLineWidth), + .AxiDataWidth (AxiDataWidth), + .NoMstPorts (NoMstPorts), + .SlvAxiIDWidth (SlvAxiIDWidth), + .mst_aw_chan_t (mst_aw_chan_t), + .w_chan_t (w_chan_t), + .mst_b_chan_t (mst_b_chan_t), + .mst_ar_chan_t (mst_ar_chan_t), + .mst_r_chan_t (mst_r_chan_t), + .mst_req_t (mst_req_t), + .mst_resp_t (mst_resp_t), + .snoop_ac_t (snoop_ac_t), + .snoop_cr_t (snoop_cr_t), + .snoop_cd_t (snoop_cd_t), + .snoop_req_t (snoop_req_t), + .snoop_resp_t (snoop_resp_t) +) ccu_ctrl_decoder_i ( + .clk_i, + .rst_ni, + + .ccu_req_i, + + .s2m_req_o (dec_snoop_req), + .m2s_resp_i, + + .slv_aw_ready_o (ccu_aw_ready), + .slv_ar_ready_o (ccu_ar_ready), + + .ccu_req_holder_o (dec_ccu_req_holder), + .su_ready_i (su_ready), + .mu_ready_i (mu_ready), + .su_valid_o (su_valid), + .mu_valid_o (mu_valid), + .su_op_o (su_op), + .mu_op_o (mu_op), + .shared_o (dec_shared), + .dirty_o (dec_dirty), + .data_available_o (data_available), + .first_responder_o (dec_first_responder) +); + +ccu_ctrl_snoop_unit #( + .DcacheLineWidth (DcacheLineWidth), + .AxiDataWidth (AxiDataWidth), + .NoMstPorts (NoMstPorts), + .SlvAxiIDWidth (SlvAxiIDWidth), + .mst_aw_chan_t (mst_aw_chan_t), + .w_chan_t (w_chan_t), + .mst_b_chan_t (mst_b_chan_t), + .mst_ar_chan_t (mst_ar_chan_t), + .mst_r_chan_t (mst_r_chan_t), + .mst_req_t (mst_req_t), + .mst_resp_t (mst_resp_t), + .snoop_ac_t (snoop_ac_t), + .snoop_cr_t (snoop_cr_t), + .snoop_cd_t (snoop_cd_t), + .snoop_req_t (snoop_req_t), + .snoop_resp_t (snoop_resp_t) +) ccu_ctrl_snoop_unit_i ( + .clk_i, + .rst_ni, + .r_o (su_r), + .r_valid_o (su_r_valid), + .r_ready_i (su_r_ready), + .cd_i (cd), + .cd_valid_i (cd_valid), + .cd_ready_o (su_cd_ready), + .cd_busy_o (su_cd_busy), + .ccu_req_holder_i (dec_ccu_req_holder), + .su_ready_o (su_ready), + .su_valid_i (su_valid), + .su_op_i (su_op), + .shared_i (dec_shared), + .dirty_i (dec_dirty), + .data_available_i (data_available), + .first_responder_i (dec_first_responder) +); + +ccu_ctrl_memory_unit #( + .DcacheLineWidth (DcacheLineWidth), + .AxiDataWidth (AxiDataWidth), + .NoMstPorts (NoMstPorts), + .SlvAxiIDWidth (SlvAxiIDWidth), + .mst_aw_chan_t (mst_aw_chan_t), + .w_chan_t (w_chan_t), + .mst_b_chan_t (mst_b_chan_t), + .mst_ar_chan_t (mst_ar_chan_t), + .mst_r_chan_t (mst_r_chan_t), + .mst_req_t (mst_req_t), + .mst_resp_t (mst_resp_t), + .snoop_ac_t (snoop_ac_t), + .snoop_cr_t (snoop_cr_t), + .snoop_cd_t (snoop_cd_t), + .snoop_req_t (snoop_req_t), + .snoop_resp_t (snoop_resp_t) +) ccu_ctrl_memory_unit_i ( + .clk_i, + .rst_ni, + + .ccu_req_i (mu_ccu_req), + .ccu_resp_o (mu_ccu_resp), + + .ccu_req_o, + .ccu_resp_i, + + .cd_i (cd), + .cd_valid_i (cd_valid), + .cd_ready_o (mu_cd_ready), + .cd_busy_o (mu_cd_busy), + + .ccu_req_holder_i (dec_ccu_req_holder), + .mu_ready_o (mu_ready), + .mu_valid_i (mu_valid), + .mu_op_i (mu_op), + .data_available_i (data_available), + .first_responder_i (dec_first_responder) +); + + logic [1:0] r_valid_in, r_ready_in; + mst_r_chan_t [1:0] r_chans_in; + + mst_r_chan_t r_chan_out; + logic r_valid_out, r_ready_out; + + always_comb begin + mu_ccu_req = ccu_req_i; + + r_valid_in = {mu_ccu_resp.r_valid, su_r_valid}; + r_chans_in = {mu_ccu_resp.r, su_r}; + {mu_ccu_req.r_ready, su_r_ready} = r_ready_in; + end + + rr_arb_tree #( + .NumIn ( 2 ), + .DataType ( mst_r_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) + ) r_arbiter_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( r_valid_in ), + .gnt_o ( r_ready_in ), + .data_i ( r_chans_in ), + .gnt_i ( r_ready_out ), + .req_o ( r_valid_out ), + .data_o ( r_chan_out ), + .idx_o ( ) + ); + + + +always_comb begin + // Resp + ccu_resp_o = mu_ccu_resp; + + ccu_resp_o.r = r_chan_out; + ccu_resp_o.r_valid = r_valid_out; + r_ready_out = ccu_req_i.r_ready; + + ccu_resp_o.ar_ready = ccu_ar_ready; + ccu_resp_o.aw_ready = ccu_aw_ready; + + // Snoop + for (int unsigned i = 0; i < NoMstPorts; i++) begin + s2m_req_o[i] = '0; + s2m_req_o[i].ac = dec_snoop_req[i].ac; + s2m_req_o[i].ac_valid = dec_snoop_req[i].ac_valid; + s2m_req_o[i].cr_ready = dec_snoop_req[i].cr_ready; + s2m_req_o[i].cd_ready = su_cd_ready[i] || mu_cd_ready[i]; // TODO arb tree + end +end + +endmodule diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv new file mode 100644 index 0000000..3dd6c68 --- /dev/null +++ b/src/ccu_ctrl_decoder.sv @@ -0,0 +1,360 @@ +module ccu_ctrl_decoder import ccu_ctrl_pkg::*; +#( + parameter int unsigned DcacheLineWidth = 0, + parameter int unsigned AxiDataWidth = 0, + parameter int unsigned NoMstPorts = 4, + parameter int unsigned SlvAxiIDWidth = 0, + parameter type mst_aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type mst_b_chan_t = logic, + parameter type mst_ar_chan_t = logic, + parameter type mst_r_chan_t = logic, + parameter type mst_req_t = logic, + parameter type mst_resp_t = logic, + parameter type snoop_ac_t = logic, + parameter type snoop_cr_t = logic, + parameter type snoop_cd_t = logic, + parameter type snoop_req_t = logic, + parameter type snoop_resp_t = logic, + localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, + localparam int unsigned MstIdxBits = $clog2(NoMstPorts) +) ( + //clock and reset + input clk_i, + input rst_ni, + // CCU Request in + input mst_req_t ccu_req_i, + // Snoop channel resuest and response + output snoop_req_t [NoMstPorts-1:0] s2m_req_o, + input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i, + + output logic slv_aw_ready_o, + output logic slv_ar_ready_o, + + output mst_req_t ccu_req_holder_o, + + output logic su_valid_o, + input logic su_ready_i, + output logic mu_valid_o, + input logic mu_ready_i, + + output mu_op_e mu_op_o, + output su_op_e su_op_o, + output logic shared_o, + output logic dirty_o, + output logic [NoMstPorts-1:0] data_available_o, + output logic [MstIdxBits-1:0] first_responder_o +); + + logic [NoMstPorts-1:0] initiator_d, initiator_q; + logic [NoMstPorts-1:0] ac_handshake_q, ac_handshake; + logic [NoMstPorts-1:0] cr_handshake_q, cr_handshake; + + enum { + IDLE, + SEND_READ, + SEND_INVALID_R, + SEND_INVALID_W, + WAIT_RESP_R, + WAIT_INVALID_R, + WAIT_INVALID_W + } state_d, state_q; + + typedef struct packed { + logic waiting_w; + logic waiting_r; + } prio_t; + + prio_t prio_d, prio_q; + + logic prio_r, prio_w; + + assign prio_r = !ccu_req_i.aw_valid || prio_q.waiting_r || !prio_q.waiting_w; + assign prio_w = !ccu_req_i.ar_valid || prio_q.waiting_w; + + logic decode_r, decode_w; + + logic send_invalid_r; + + assign send_invalid_r = ccu_req_i.ar.snoop == snoop_pkg::CLEAN_UNIQUE || ccu_req_i.ar.lock; + + for (genvar i = 0; i < NoMstPorts; i = i + 1) begin + assign ac_handshake[i] = m2s_resp_i[i].ac_ready & s2m_req_o[i].ac_valid; + assign cr_handshake[i] = m2s_resp_i[i].cr_valid & s2m_req_o[i].cr_ready; + end + + // Hold incoming ACE request + mst_req_t ccu_req_holder_q; + + always_ff @(posedge clk_i , negedge rst_ni) begin + if(!rst_ni) begin + ccu_req_holder_q <= '0; + end else if(decode_r) begin + ccu_req_holder_q.ar <= ccu_req_i.ar; + ccu_req_holder_q.ar_valid <= ccu_req_i.ar_valid; + ccu_req_holder_q.r_ready <= ccu_req_i.r_ready; + end else if(decode_w) begin + ccu_req_holder_q.aw <= ccu_req_i.aw; + ccu_req_holder_q.aw_valid <= ccu_req_i.aw_valid; + end + end + + assign ccu_req_holder_o = ccu_req_holder_q; + + // Hold snoop AC handshakes + for (genvar i = 0; i < NoMstPorts; i = i + 1) begin + always_ff @ (posedge clk_i, negedge rst_ni) begin + if(!rst_ni) begin + ac_handshake_q[i] <= '0; + end else if(decode_r || decode_w) begin + ac_handshake_q[i] <= initiator_d[i]; + end else if(state_q inside {SEND_READ, SEND_INVALID_R, SEND_INVALID_W}) begin + if (ac_handshake[i]) + ac_handshake_q[i] <= 1'b1; + end else begin + ac_handshake_q[i] <= '0; + end + end + end + + // Hold snoop CR handshakes + logic [NoMstPorts-1:0] data_available_q, response_error_q, shared_q, dirty_q; + always_ff @ (posedge clk_i, negedge rst_ni) begin + if(!rst_ni) begin + cr_handshake_q <= '0; + data_available_q <= '0; + shared_q <= '0; + dirty_q <= '0; + response_error_q <= '0; + end else if(state_q == IDLE) begin + cr_handshake_q <= '0; + data_available_q <= '0; + shared_q <= '0; + dirty_q <= '0; + response_error_q <= '0; + end else if(state_q inside {SEND_READ, SEND_INVALID_R, SEND_INVALID_W}) begin + cr_handshake_q <= initiator_q; + end else begin + for (int i = 0; i < NoMstPorts; i = i + 1) begin + if(cr_handshake[i]) begin + cr_handshake_q[i] <= 1'b1; + data_available_q[i] <= m2s_resp_i[i].cr_resp.dataTransfer; + shared_q[i] <= m2s_resp_i[i].cr_resp.isShared; + dirty_q[i] <= m2s_resp_i[i].cr_resp.passDirty; + response_error_q[i] <= m2s_resp_i[i].cr_resp.error; + end + end + end + end + + assign dirty_o = |dirty_q; + assign shared_o = |shared_q; + assign data_available_o = data_available_q; + + logic [MstIdxBits-1:0] first_responder_q; + logic snoop_resp_found_q; + + always_ff @ (posedge clk_i, negedge rst_ni) begin + if(!rst_ni) begin + first_responder_q <= '0; + snoop_resp_found_q <= 1'b0; + end else if(state_q == IDLE) begin + first_responder_q <= '0; + snoop_resp_found_q <= 1'b0; + end else if (!snoop_resp_found_q) begin + for (int i = 0; i < NoMstPorts; i = i + 1) begin + if(cr_handshake[i] & m2s_resp_i[i].cr_resp.dataTransfer & !m2s_resp_i[i].cr_resp.error) begin + first_responder_q <= i[MstIdxBits-1:0]; + snoop_resp_found_q <= 1'b1; + break; + end + end + end + end + + assign first_responder_o = first_responder_q; + + + localparam Legacy = 1; + + + // ---------------------- + // Current State Block + // ---------------------- + always_ff @(posedge clk_i, negedge rst_ni) begin : ccu_present_state + if(!rst_ni) begin + state_q <= IDLE; + initiator_q <= '0; + prio_q <= '0; + end else begin + state_q <= state_d; + initiator_q <= initiator_d; + prio_q <= prio_d; + end + end + + // ---------------------- + // Current State Block + // ---------------------- + + always_comb begin + + // Next state + state_d = state_q; + initiator_d = initiator_q; + prio_d = prio_q; + + // Output + s2m_req_o = '0; + + slv_ar_ready_o = '0; + slv_aw_ready_o = '0; + + su_valid_o = 1'b0; + mu_valid_o = 1'b0; + su_op_o = READ_SNP_DATA; + mu_op_o = SEND_AXI_REQ_R; + + // Ctrl flags + decode_r = 1'b0; + decode_w = 1'b0; + + case (state_q) + IDLE: begin + + initiator_d = '0; + prio_d = '0; + if (!Legacy || (mu_ready_i && su_ready_i)) begin + // wait for incoming valid request from master + if(ccu_req_i.ar_valid & prio_r) begin + decode_r = 1'b1; + state_d = send_invalid_r ? SEND_INVALID_R : SEND_READ; + initiator_d[ccu_req_i.ar.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; + prio_d.waiting_w = ccu_req_i.aw_valid; + end else if(ccu_req_i.aw_valid & prio_w) begin + decode_w = 1'b1; + state_d = SEND_INVALID_W; + initiator_d[ccu_req_i.aw.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; + prio_d.waiting_r = ccu_req_i.ar_valid; + end + + slv_ar_ready_o = prio_r; + slv_aw_ready_o = prio_w; + end + end + + SEND_READ: begin + // wait for all snoop masters to perform an handshake + if (ac_handshake_q == '1) begin + state_d = WAIT_RESP_R; + end + // send request to snooping masters + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin + s2m_req_o[n].ac.addr = ccu_req_holder_q.ar.addr; + s2m_req_o[n].ac.prot = ccu_req_holder_q.ar.prot; + s2m_req_o[n].ac.snoop = ccu_req_holder_q.ar.snoop; + s2m_req_o[n].ac_valid = !ac_handshake_q[n]; + end + end + + SEND_INVALID_R: begin + // wait for all snoop masters to perform an handshake + if (ac_handshake_q == '1) begin + state_d = WAIT_INVALID_R; + end + // send request to snooping masters + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin + s2m_req_o[n].ac.addr = ccu_req_holder_q.ar.addr; + s2m_req_o[n].ac.prot = ccu_req_holder_q.ar.prot; + s2m_req_o[n].ac.snoop = snoop_pkg::CLEAN_INVALID; + s2m_req_o[n].ac_valid = !ac_handshake_q[n]; + end + end + + SEND_INVALID_W: begin + // wait for all snoop masters to perform an handshake + if (ac_handshake_q == '1) begin + state_d = WAIT_INVALID_W; + end + // send request to snooping masters + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin + s2m_req_o[n].ac.addr = ccu_req_holder_q.aw.addr; + s2m_req_o[n].ac.prot = ccu_req_holder_q.aw.prot; + s2m_req_o[n].ac.snoop = snoop_pkg::CLEAN_INVALID; + s2m_req_o[n].ac_valid = !ac_handshake_q[n]; + end + end + + WAIT_RESP_R: begin + // wait for all CR handshakes + if (cr_handshake_q == '1) begin + + if(|(data_available_q & ~response_error_q)) begin + su_op_o = READ_SNP_DATA; + su_valid_o = 1'b1; + if (su_ready_i) begin + state_d = IDLE; + end + end else begin + mu_op_o = SEND_AXI_REQ_R; + mu_valid_o = 1'b1; + if (mu_ready_i) begin + state_d = IDLE; + end + end + end + + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) + s2m_req_o[n].cr_ready = !cr_handshake_q[n]; + end + + WAIT_INVALID_R: begin + // wait for all CR handshakes + if (cr_handshake_q == '1 && (ccu_req_i.r_ready || ccu_req_holder_q.ar.lock)) begin + + if (mu_ready_i && (ccu_req_holder_q.ar.lock || su_ready_i)) begin + state_d = IDLE; + end + + if(|(data_available_q & ~response_error_q)) begin + mu_op_o = SEND_AXI_REQ_WRITE_BACK_R; + mu_valid_o = 1'b1; + end else if (ccu_req_holder_q.ar.lock) begin + mu_op_o = SEND_AXI_REQ_R; + mu_valid_o = 1'b1; + end + end + + if (cr_handshake_q == '1 && !ccu_req_holder_q.ar.lock) begin + su_op_o = SEND_INVALID_ACK_R; + su_valid_o = 1'b1; + end + + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) + s2m_req_o[n].cr_ready = !cr_handshake_q[n]; + end + + WAIT_INVALID_W: begin + // wait for all CR handshakes + if (cr_handshake_q == '1) begin + + mu_valid_o = 1'b1; + + if (mu_ready_i) begin + state_d = IDLE; + end + + if(|(data_available_q & ~response_error_q)) begin + mu_op_o = SEND_AXI_REQ_WRITE_BACK_W; + end else begin + mu_op_o = SEND_AXI_REQ_W; + end + end + + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) + s2m_req_o[n].cr_ready = !cr_handshake_q[n]; + end + endcase + end + +endmodule \ No newline at end of file diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv new file mode 100644 index 0000000..fb89783 --- /dev/null +++ b/src/ccu_ctrl_memory_unit.sv @@ -0,0 +1,342 @@ +module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; +#( + parameter int unsigned DcacheLineWidth = 0, + parameter int unsigned AxiDataWidth = 0, + parameter int unsigned NoMstPorts = 4, + parameter int unsigned SlvAxiIDWidth = 0, + parameter type mst_aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type mst_b_chan_t = logic, + parameter type mst_ar_chan_t = logic, + parameter type mst_r_chan_t = logic, + parameter type mst_req_t = logic, + parameter type mst_resp_t = logic, + parameter type snoop_ac_t = logic, + parameter type snoop_cr_t = logic, + parameter type snoop_cd_t = logic, + parameter type snoop_req_t = logic, + parameter type snoop_resp_t = logic, + localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, + localparam int unsigned MstIdxBits = $clog2(NoMstPorts) +) ( + //clock and reset + input clk_i, + input rst_ni, + // CCU Request In and response out + input mst_req_t ccu_req_i, + output mst_resp_t ccu_resp_o, + //CCU Request Out and response in + output mst_req_t ccu_req_o, + input mst_resp_t ccu_resp_i, + + input snoop_cd_t [NoMstPorts-1:0] cd_i, + input logic [NoMstPorts-1:0] cd_valid_i, + output logic [NoMstPorts-1:0] cd_ready_o, + output logic cd_busy_o, + + input mst_req_t ccu_req_holder_i, + output logic mu_ready_o, + input logic mu_valid_i, + input mu_op_e mu_op_i, + input logic [NoMstPorts-1:0] data_available_i, + input logic [MstIdxBits-1:0] first_responder_i +); + +localparam FIFO_DEPTH = 2; + +mst_req_t ccu_req_holder_q; +logic [MstIdxBits-1:0] first_responder_q, fifo_first_responder_q, fifo_first_responder_d; +logic [NoMstPorts-1:0] data_available_q; + +logic sample_dec_data; + +always_ff @(posedge clk_i , negedge rst_ni) begin + if(!rst_ni) begin + ccu_req_holder_q <= '0; + first_responder_q <= '0; + data_available_q <= '0; + end else if (sample_dec_data) begin + ccu_req_holder_q <= ccu_req_holder_i; + first_responder_q <= first_responder_i; + data_available_q <= data_available_i; + end +end + +enum {Ax_IDLE, Ax_BUSY} ax_state_q, ax_state_d; +mu_op_e ax_op_q, ax_op_d; + +always_ff @(posedge clk_i , negedge rst_ni) begin + if(!rst_ni) begin + ax_state_q <= Ax_IDLE; + ax_op_q <= SEND_AXI_REQ_R; + end else begin + ax_state_q <= ax_state_d; + ax_op_q <= ax_op_d; + end +end + +mst_ar_chan_t ar_out; +mst_aw_chan_t aw_out; + +logic ar_valid_out, aw_valid_out; + +logic cd_data_incoming; + +localparam Legacy = 1; + +always_comb begin + mu_ready_o = 1'b0; + ax_state_d = ax_state_q; + ax_op_d = ax_op_q; + + sample_dec_data = 1'b0; + + ar_out = '0; + aw_out = '0; + ar_valid_out = 1'b0; + aw_valid_out = 1'b0; + + cd_data_incoming = 1'b0; + + case (ax_state_q) + Ax_IDLE: begin + mu_ready_o = 1'b1; + if (mu_valid_i) begin + sample_dec_data = 1'b1; + ax_op_d = mu_op_i; + ax_state_d = Ax_BUSY; + end + end + Ax_BUSY: begin + case (ax_op_q) + SEND_AXI_REQ_R: begin + // If a lock is present, wait for W to complete + if (!ccu_req_holder_q.ar.lock || ccu_resp_i.w_ready) begin + ar_valid_out = 'b1; + ar_out = ccu_req_holder_q.ar; + if (ccu_resp_i.ar_ready) begin + if (Legacy) + ax_op_d = LEGACY_WAIT_READ; + else + ax_state_d = Ax_IDLE; + end + end + end + SEND_AXI_REQ_WRITE_BACK_R: begin + cd_data_incoming = 1'b1; + // send writeback request + aw_valid_out = 'b1; + aw_out = '0; //default + aw_out.addr = ccu_req_holder_q.ar.addr; + aw_out.addr[3:0] = 4'b0; // writeback is always full cache line + aw_out.size = 2'b11; + aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + aw_out.id = {first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations + aw_out.len = DcacheLineWords-1; + // WRITEBACK + aw_out.domain = 2'b00; + aw_out.snoop = 3'b011; + if (ccu_resp_i.aw_ready) begin + if (ccu_req_holder_q.ar.lock) + ax_op_d = SEND_AXI_REQ_R; + else if (Legacy) + ax_op_d = LEGACY_WAIT_WB; + else + ax_state_d = Ax_IDLE; + end + end + SEND_AXI_REQ_W: begin + aw_valid_out = 'b1; + aw_out = ccu_req_holder_q.aw; + if (ccu_resp_i.aw_ready) begin + if (Legacy) + ax_op_d = LEGACY_WAIT_WRITE; + else + ax_state_d = Ax_IDLE; + end + end + SEND_AXI_REQ_WRITE_BACK_W: begin + cd_data_incoming = 1'b1; + // send writeback request + aw_valid_out = 'b1; + aw_out = '0; //default + aw_out.addr = ccu_req_holder_q.aw.addr; + aw_out.addr[3:0] = 4'b0; // writeback is always full cache line + aw_out.size = 2'b11; + aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + aw_out.id = {first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations + aw_out.len = DcacheLineWords-1; + // WRITEBACK + aw_out.domain = 2'b00; + aw_out.snoop = 3'b011; + if (ccu_resp_i.aw_ready) begin + if (Legacy) + ax_op_d = LEGACY_WAIT_WB; + else + ax_state_d = Ax_IDLE; + end + end + LEGACY_WAIT_WRITE: begin + if(ccu_resp_i.b_valid && ccu_req_i.b_ready) + ax_state_d = Ax_IDLE; + end + LEGACY_WAIT_READ: begin + if(ccu_resp_i.r_valid && ccu_req_i.r_ready && ccu_resp_i.r.last) + ax_state_d = Ax_IDLE; + end + LEGACY_WAIT_WB: begin + if(ccu_resp_i.b_valid && ccu_req_o.b_ready) + ax_state_d = Ax_IDLE; + end + endcase + end + endcase +end + +logic [AxiDataWidth-1:0] fifo_data_in, fifo_data_out; +logic [$clog2(DcacheLineWords)-1:0] fifo_usage; + +enum { FIFO_IDLE, FIFO_LOWER_HALF, FIFO_UPPER_HALF, FIFO_WAIT } fifo_state_q, fifo_state_d; + +logic w_busy_d, w_busy_q; +logic w_last_d, w_last_q; + +always_ff @(posedge clk_i or negedge rst_ni) begin + if(!rst_ni) begin + fifo_state_q <= FIFO_IDLE; + w_busy_q <= 1'b0; + fifo_first_responder_q <= '0; + w_last_q <= 1'b0; + end else begin + fifo_state_q <= fifo_state_d; + w_busy_q <= w_busy_d; + fifo_first_responder_q <= fifo_first_responder_d; + w_last_q <= w_last_d; + end +end + +logic fifo_push, fifo_flush, fifo_pop, fifo_full, fifo_empty; + +always_comb begin + fifo_state_d = fifo_state_q; + fifo_first_responder_d = fifo_first_responder_q; + + case (fifo_state_q) + FIFO_IDLE: begin + if (cd_data_incoming) begin + fifo_state_d = FIFO_LOWER_HALF; + fifo_first_responder_d = first_responder_q; + end + end + FIFO_LOWER_HALF: begin + if(cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]) begin + fifo_state_d = FIFO_UPPER_HALF; + end + end + FIFO_UPPER_HALF: begin + if(cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]) begin + fifo_state_d = FIFO_WAIT; + end + end + FIFO_WAIT: begin + if (ccu_resp_i.b_valid && ccu_req_o.b_ready) + fifo_state_d = FIFO_IDLE; + end + endcase + +end + +assign cd_busy_o = fifo_state_q != FIFO_IDLE; +assign fifo_push = cd_busy_o && cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]; +assign fifo_flush = !cd_busy_o; +assign fifo_data_in = cd_i[fifo_first_responder_q].data; +assign fifo_pop = w_busy_q ? '0 : ccu_resp_i.w_ready && ccu_req_o.w_valid; + + + fifo_v3 #( + .FALL_THROUGH(0), + .DATA_WIDTH(AxiDataWidth), + .DEPTH(FIFO_DEPTH) + ) cd_memory_fifo_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (fifo_flush), + .testmode_i (1'b0), + .full_o (fifo_full), + .empty_o (fifo_empty), + .usage_o (fifo_usage), + .data_i (fifo_data_in), + .push_i (fifo_push), + .data_o (fifo_data_out), + .pop_i (fifo_pop) +); + +logic [NoMstPorts-1:0] cd_last_q; + +for (genvar i = 0; i < NoMstPorts; i = i + 1) begin + always_ff @ (posedge clk_i, negedge rst_ni) begin + if(!rst_ni) begin + cd_last_q[i] <= '0; + end else if(!cd_busy_o) begin + cd_last_q[i] <= '0; + end else if(cd_valid_i[i]) begin + cd_last_q[i] <= (cd_i[i].last & data_available_q[i]); + end + end +end + +always_comb begin + cd_ready_o = '0; + + if (cd_busy_o) begin + for (int i = 0; i < NoMstPorts; i = i + 1) begin + cd_ready_o[i] = !cd_last_q[i] && data_available_q[i]; + end + + if (fifo_full) begin + cd_ready_o[fifo_first_responder_q] = 1'b0; + end + end + +end + +// AR +assign ccu_req_o.ar = ar_out; +assign ccu_req_o.ar_valid = ar_valid_out; + +// AW +assign ccu_req_o.aw = aw_out; +assign ccu_req_o.aw_valid = aw_valid_out; + +// R passthrough +assign ccu_resp_o.r = ccu_resp_i.r; +assign ccu_resp_o.r_valid = ccu_resp_i.r_valid; +assign ccu_req_o.r_ready = ccu_req_i.r_ready; + +always_comb begin + + w_busy_d = 1'b0; + w_last_d = 1'b0; + + // W and B + // Connect the FIFO as long as the transmission is ongoing + if (cd_busy_o && !w_busy_q) begin + w_last_d = ccu_resp_i.w_ready && !fifo_empty; + ccu_req_o.w_valid = !fifo_empty; + ccu_req_o.w.strb = '1; + ccu_req_o.w.data = fifo_data_out; + ccu_req_o.w.last = w_last_q; + ccu_req_o.b_ready = 'b1; + end else begin + w_busy_d = (ccu_req_i.w_valid && !ccu_resp_i.w_ready) || (w_busy_q && !(ccu_resp_i.b_valid && ccu_req_i.b_ready)); + ccu_req_o.w = ccu_req_i.w; + ccu_req_o.w_valid = ccu_req_i.w_valid; + ccu_req_o.b_ready = ccu_req_i.b_ready; + + ccu_resp_o.b = ccu_resp_i.b; + ccu_resp_o.b_valid = ccu_resp_i.b_valid; + ccu_resp_o.w_ready = ccu_resp_i.w_ready; + end +end + +endmodule \ No newline at end of file diff --git a/src/ccu_ctrl_pkg.sv b/src/ccu_ctrl_pkg.sv new file mode 100644 index 0000000..ee25ef1 --- /dev/null +++ b/src/ccu_ctrl_pkg.sv @@ -0,0 +1,18 @@ +package ccu_ctrl_pkg; + + typedef enum logic [3:0] { + SEND_AXI_REQ_R, + SEND_AXI_REQ_WRITE_BACK_R, + SEND_AXI_REQ_W, + SEND_AXI_REQ_WRITE_BACK_W, + LEGACY_WAIT_WRITE, + LEGACY_WAIT_READ, + LEGACY_WAIT_WB + } mu_op_e; + + typedef enum logic { + READ_SNP_DATA, + SEND_INVALID_ACK_R + } su_op_e; + +endpackage \ No newline at end of file diff --git a/src/ccu_ctrl_snoop_unit.sv b/src/ccu_ctrl_snoop_unit.sv new file mode 100644 index 0000000..173d066 --- /dev/null +++ b/src/ccu_ctrl_snoop_unit.sv @@ -0,0 +1,265 @@ +module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; +#( + parameter int unsigned DcacheLineWidth = 0, + parameter int unsigned AxiDataWidth = 0, + parameter int unsigned NoMstPorts = 4, + parameter int unsigned SlvAxiIDWidth = 0, + parameter type mst_aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type mst_b_chan_t = logic, + parameter type mst_ar_chan_t = logic, + parameter type mst_r_chan_t = logic, + parameter type mst_req_t = logic, + parameter type mst_resp_t = logic, + parameter type snoop_ac_t = logic, + parameter type snoop_cr_t = logic, + parameter type snoop_cd_t = logic, + parameter type snoop_req_t = logic, + parameter type snoop_resp_t = logic, + localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, + localparam int unsigned MstIdxBits = $clog2(NoMstPorts) +) ( + //clock and reset + input clk_i, + input rst_ni, + // CCU Request In and response out + output mst_r_chan_t r_o, + output logic r_valid_o, + input logic r_ready_i, + + input snoop_cd_t [NoMstPorts-1:0] cd_i, + input logic [NoMstPorts-1:0] cd_valid_i, + output logic [NoMstPorts-1:0] cd_ready_o, + output logic cd_busy_o, + + input mst_req_t ccu_req_holder_i, + output logic su_ready_o, + input logic su_valid_i, + input su_op_e su_op_i, + input logic shared_i, + input logic dirty_i, + input logic [NoMstPorts-1:0] data_available_i, + input logic [MstIdxBits-1:0] first_responder_i +); + +localparam FIFO_DEPTH = 2; + +enum { + IDLE, + SEND_LOWER_HALF, + SEND_UPPER_HALF, + WAIT_R_READY, + WAIT_CD_LAST +} state_d, state_q; + +logic [AxiDataWidth-1:0] fifo_data_in, fifo_data_out; +logic [$clog2(DcacheLineWords)-1:0] fifo_usage; + +logic sample_dec_data; + +mst_req_t ccu_req_holder_q; +logic shared_q; +logic dirty_q; +logic [MstIdxBits-1:0] first_responder_q; +logic [NoMstPorts-1:0] data_available_q; + +always_ff @(posedge clk_i , negedge rst_ni) begin + if(!rst_ni) begin + ccu_req_holder_q <= '0; + shared_q <= '0; + dirty_q <= '0; + first_responder_q <= '0; + data_available_q <= '0; + end else if(sample_dec_data) begin + ccu_req_holder_q <= ccu_req_holder_i; + shared_q <= shared_i; + dirty_q <= dirty_i; + first_responder_q <= first_responder_i; + data_available_q <= data_available_i; + end +end + +always_ff @(posedge clk_i , negedge rst_ni) begin + if(!rst_ni) begin + state_q <= IDLE; + end else begin + state_q <= state_d; + end +end + +logic ar_addr_offset; + +assign ar_addr_offset = ccu_req_holder_q.ar.addr[3]; + +logic fifo_full, fifo_empty, fifo_push, fifo_pop; + +logic [NoMstPorts-1:0] cd_last_q; + +always_comb begin + + state_d = state_q; + + su_ready_o = 1'b0; + + r_o = '0; + r_valid_o = 1'b0; + + fifo_pop = 1'b0; + + sample_dec_data = 1'b0; + + case (state_q) + IDLE: begin + su_ready_o = 1'b1; + if (su_valid_i) begin + if (su_op_i == SEND_INVALID_ACK_R) begin + r_o = '0; + r_o.id = ccu_req_holder_i.ar.id; + r_o.last = 'b1; + r_valid_o = 'b1; + if (!r_ready_i) + state_d = WAIT_R_READY; + end else if (su_op_i == READ_SNP_DATA) begin + sample_dec_data = 1'b1; + state_d = SEND_LOWER_HALF; + end + end + end + + SEND_LOWER_HALF: begin + // Prepare request + r_o.data = fifo_data_out; + r_o.id = ccu_req_holder_q.ar.id; + r_o.resp[3] = shared_q; // update if shared + r_o.resp[2] = dirty_q; // update if any line dirty + + if (!fifo_empty) begin + // Single data request + if (ccu_req_holder_q.ar.len == 0) begin + // The lower 64 bits are required + if (!ar_addr_offset) begin + r_o.last = 1'b1; + r_valid_o = 1'b1; // There is something to send + if (r_ready_i) begin + state_d = WAIT_CD_LAST; + fifo_pop = 1'b1; + end + end else begin + // The lower 64 bits are not needed + // Consume them and move the upper 64 bits + state_d = SEND_UPPER_HALF; + fifo_pop = 1'b1; + end + end else begin + // Full cacheline request + r_o.last = 1'b0; + r_valid_o = 1'b1; // There is something to send + if (r_ready_i) begin + state_d = SEND_UPPER_HALF; + fifo_pop = 1'b1; + end + end + end + end + + SEND_UPPER_HALF: begin + // Prepare request + r_o.data = fifo_data_out; + r_o.id = ccu_req_holder_q.ar.id; + r_o.resp[3] = shared_q; // Update if shared + r_o.resp[2] = dirty_q; // Update if any line dirty + r_o.last = 1'b1; // No further transactions + + if (!fifo_empty) begin + // // Single data request + // if (ccu_req_holder_q.ar.len == 0) begin + // // The upper 64 bit are required + // if (ar_addr_offset) begin + // r_valid_o = 1'b1; // There is something to send + // end + // end else begin + // // Full cacheline request + // r_valid_o = 1'b1; // There is something to send + // end + + r_valid_o = 1'b1; + + if (r_ready_i) begin + fifo_pop = 1'b1; + state_d = (cd_last_q == data_available_q) ? IDLE : WAIT_CD_LAST; + end + end + end + + WAIT_R_READY: begin + + r_o = '0; + r_o.id = ccu_req_holder_i.ar.id; + r_o.last = 'b1; + r_valid_o = 'b1; + + if (r_ready_i) + state_d = IDLE; + end + + WAIT_CD_LAST: begin + if (cd_last_q == data_available_q) + state_d = IDLE; + end + endcase +end + +assign cd_busy_o = !(state_q inside {IDLE, WAIT_R_READY}); + +assign fifo_push = cd_busy_o && cd_valid_i[first_responder_q] && cd_ready_o[first_responder_q]; +assign fifo_flush = !cd_busy_o; +assign fifo_data_in = cd_i[first_responder_q].data; + + + fifo_v3 #( + .FALL_THROUGH(0), + .DATA_WIDTH(AxiDataWidth), + .DEPTH(FIFO_DEPTH) + ) cd_snoop_fifo_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (fifo_flush), + .testmode_i (1'b0), + .full_o (fifo_full), + .empty_o (fifo_empty), + .usage_o (fifo_usage), + .data_i (fifo_data_in), + .push_i (fifo_push), + .data_o (fifo_data_out), + .pop_i (fifo_pop) +); + + +for (genvar i = 0; i < NoMstPorts; i = i + 1) begin + always_ff @ (posedge clk_i, negedge rst_ni) begin + if(!rst_ni) begin + cd_last_q[i] <= '0; + end else if(!cd_busy_o) begin + cd_last_q[i] <= '0; + end else if(cd_valid_i[i]) begin + cd_last_q[i] <= (cd_i[i].last & data_available_q[i]); + end + end +end + +always_comb begin + cd_ready_o = '0; + + if (cd_busy_o) begin + for (int i = 0; i < NoMstPorts; i = i + 1) begin + cd_ready_o[i] = !cd_last_q[i] && data_available_q[i]; + end + + if (fifo_full) begin + cd_ready_o[first_responder_q] = 1'b0; + end + end + +end + +endmodule diff --git a/src/ccu_fsm.sv b/src/ccu_fsm.sv deleted file mode 100644 index f1cfe87..0000000 --- a/src/ccu_fsm.sv +++ /dev/null @@ -1,705 +0,0 @@ -// Copyright 2022 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -`include "ace/assign.svh" -`include "ace/typedef.svh" - -module ccu_fsm -#( - parameter int unsigned DcacheLineWidth = 0, - parameter int unsigned AxiDataWidth = 0, - parameter int unsigned NoMstPorts = 4, - parameter int unsigned SlvAxiIDWidth = 0, - parameter type mst_req_t = logic, - parameter type mst_resp_t = logic, - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic -) ( - //clock and reset - input clk_i, - input rst_ni, - // CCU Request In and response out - input mst_req_t ccu_req_i, - output mst_resp_t ccu_resp_o, - //CCU Request Out and response in - output mst_req_t ccu_req_o, - input mst_resp_t ccu_resp_i, - // Snoop channel resuest and response - output snoop_req_t [NoMstPorts-1:0] s2m_req_o, - input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i -); - - localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; - localparam int unsigned MstIdxBits = $clog2(NoMstPorts); - - enum logic [5:0] { - IDLE, // 0 - DECODE_R, // 1 - SEND_INVALID_R, // 2 - WAIT_INVALID_R, // 3 - SEND_AXI_REQ_WRITE_BACK_R, // 4 - WRITE_BACK_MEM_R, // 5 - SEND_READ, // 6 - WAIT_RESP_R, // 7 - READ_SNP_DATA, // 8 - SEND_AXI_REQ_R, // 9 - READ_MEM, // 10 - DECODE_W, // 11 - SEND_INVALID_W, // 12 - WAIT_INVALID_W, // 13 - SEND_AXI_REQ_WRITE_BACK_W, // 14 - WRITE_BACK_MEM_W, // 15 - SEND_AXI_REQ_W, // 16 - WRITE_MEM // 17 - } state_d, state_q; - - - // snoop resoponse valid - logic [NoMstPorts-1:0] cr_valid; - // snoop channel ac valid - logic [NoMstPorts-1:0] ac_valid; - // snoop channel ac ready - logic [NoMstPorts-1:0] ac_ready; - // snoop channel cd last - logic [NoMstPorts-1:0] cd_last; - // check for availablilty of data - logic [NoMstPorts-1:0] data_available; - // check for response error - logic [NoMstPorts-1:0] response_error; - // check for data received - logic [NoMstPorts-1:0] data_received; - // check for shared in cr_resp - logic [NoMstPorts-1:0] shared; - // check for dirty in cr_resp - logic [NoMstPorts-1:0] dirty; - // request holder - mst_req_t ccu_req_holder; - // response holder - mst_resp_t ccu_resp_holder; - // snoop response holder - snoop_resp_t [NoMstPorts-1:0] m2s_resp_holder; - // initiating master port - logic [NoMstPorts-1:0] initiator_d, initiator_q; - logic [MstIdxBits-1:0] first_responder; - - logic [DcacheLineWords-1:0][AxiDataWidth-1:0] cd_data; - logic [$clog2(DcacheLineWords+1)-1:0] stored_cd_data; - - logic r_last; - logic w_last; - logic r_eot; - logic w_eot; - - typedef struct packed { - logic waiting_w; - logic waiting_r; - } prio_t; - - prio_t prio_d, prio_q; - - // ---------------------- - // Current State Block - // ---------------------- - always_ff @(posedge clk_i, negedge rst_ni) begin : ccu_present_state - if(!rst_ni) begin - state_q <= IDLE; - initiator_q <= '0; - prio_q <= '0; - end else begin - state_q <= state_d; - initiator_q <= initiator_d; - prio_q <= prio_d; - end - end - - // ---------------------- - // Next State Block - // ---------------------- - always_comb begin : ccu_state_ctrl - - state_d = state_q; - initiator_d = initiator_q; - prio_d = prio_q; - - case(state_q) - - IDLE: begin - initiator_d = '0; - prio_d = '0; - // wait for incoming valid request from master - if((ccu_req_i.ar_valid & !ccu_req_i.aw_valid) | - (ccu_req_i.ar_valid & prio_q.waiting_r) | - (ccu_req_i.ar_valid & !prio_q.waiting_w)) begin - state_d = DECODE_R; - initiator_d[ccu_req_i.ar.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; - prio_d.waiting_w = ccu_req_i.aw_valid; - end else if((ccu_req_i.aw_valid & !ccu_req_i.ar_valid) | - (ccu_req_i.aw_valid & prio_q.waiting_w)) begin - state_d = DECODE_W; - initiator_d[ccu_req_i.aw.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; - prio_d.waiting_r = ccu_req_i.ar_valid; - end else begin - state_d = IDLE; - end - end - - //--------------------- - //---- Read Branch ---- - //--------------------- - DECODE_R: begin - //check read transaction type - if (ccu_req_holder.ar.snoop == snoop_pkg::CLEAN_UNIQUE) begin // check if CleanUnique then send Invalidate - state_d = SEND_INVALID_R; - end else if (ccu_req_holder.ar.lock) begin // AMO LR, invalidate - state_d = SEND_INVALID_R; - end else begin - state_d = SEND_READ; - end - end - - SEND_INVALID_R: begin - // wait for all snoop masters to assert AC ready - if (ac_ready != '1) begin - state_d = SEND_INVALID_R; - end else begin - state_d = WAIT_INVALID_R; - end - end - - WAIT_INVALID_R: begin - // wait for all snoop masters to assert CR valid - if ((cr_valid == '1) && (ccu_req_i.r_ready || ccu_req_holder.ar.lock)) begin - if(|(data_available & ~response_error)) begin - state_d = SEND_AXI_REQ_WRITE_BACK_R; - end else begin - if (ccu_req_holder.ar.lock) begin // AMO LR, read memory - state_d = SEND_AXI_REQ_R; - end else begin - state_d = IDLE; - end - end - end else begin - state_d = WAIT_INVALID_R; - end - end - - SEND_AXI_REQ_WRITE_BACK_R: begin - // wait for responding slave to assert aw_ready - if(ccu_resp_i.aw_ready !='b1) begin - state_d = SEND_AXI_REQ_WRITE_BACK_R; - end else begin - state_d = WRITE_BACK_MEM_R; - end - end - - WRITE_BACK_MEM_R: begin - // wait for responding slave to send b_valid - if((ccu_resp_i.b_valid && ccu_req_o.b_ready)) begin - if (ccu_req_holder.ar.lock) begin // AMO LR, read memory - state_d = SEND_AXI_REQ_R; - end else begin - state_d = IDLE; - end - end else begin - state_d = WRITE_BACK_MEM_R; - end - end - - SEND_READ: begin - // wait for all snoop masters to de-assert AC ready - if (ac_ready != '1) begin - state_d = SEND_READ; - end else begin - state_d = WAIT_RESP_R; - end - end - - WAIT_RESP_R: begin - // wait for all snoop masters to assert CR valid - if (cr_valid != '1) begin - state_d = WAIT_RESP_R; - end else if(|(data_available & ~response_error)) begin - state_d = READ_SNP_DATA; - end else begin - state_d = SEND_AXI_REQ_R; - end - end - - READ_SNP_DATA: begin - if(cd_last == data_available && (r_eot == 1'b1 || (ccu_req_i.r_ready == 1'b1 && r_last == 1'b1))) begin - state_d = IDLE; - end else begin - state_d = READ_SNP_DATA; - end - end - - SEND_AXI_REQ_R: begin - // wait for responding slave to assert ar_ready - if(ccu_resp_i.ar_ready !='b1) begin - state_d = SEND_AXI_REQ_R; - end else begin - state_d = READ_MEM; - end - end - - READ_MEM: begin - // wait for responding slave to assert r_valid - if(ccu_resp_i.r_valid && ccu_req_i.r_ready) begin - if(ccu_resp_i.r.last) begin - state_d = IDLE; - end else begin - state_d = READ_MEM; - end - end else begin - state_d = READ_MEM; - end - end - - - //--------------------- - //---- Write Branch --- - //--------------------- - - DECODE_W: begin - state_d = SEND_INVALID_W; - end - - SEND_INVALID_W: begin - // wait for all snoop masters to assert AC ready - if (ac_ready != '1) begin - state_d = SEND_INVALID_W; - end else begin - state_d = WAIT_INVALID_W; - end - end - - WAIT_INVALID_W: begin - // wait for all snoop masters to assert CR valid - if (cr_valid != '1) begin - state_d = WAIT_INVALID_W; - end else if(|(data_available & ~response_error)) begin - state_d = SEND_AXI_REQ_WRITE_BACK_W; - end else begin - state_d = SEND_AXI_REQ_W; - end - end - - SEND_AXI_REQ_WRITE_BACK_W: begin - // wait for responding slave to assert aw_ready - if(ccu_resp_i.aw_ready !='b1) begin - state_d = SEND_AXI_REQ_WRITE_BACK_W; - end else begin - state_d = WRITE_BACK_MEM_W; - end - end - - WRITE_BACK_MEM_W: begin - // wait for responding slave to send b_valid - if((ccu_resp_i.b_valid && ccu_req_o.b_ready)) begin - state_d = SEND_AXI_REQ_W; - end else begin - state_d = WRITE_BACK_MEM_W; - end - end - - SEND_AXI_REQ_W: begin - // wait for responding slave to assert aw_ready - if(ccu_resp_i.aw_ready !='b1) begin - state_d = SEND_AXI_REQ_W; - end else begin - state_d = WRITE_MEM; - end - end - - WRITE_MEM: begin - // wait for responding slave to send b_valid - if((ccu_resp_i.b_valid && ccu_req_i.b_ready)) begin - if(ccu_req_holder.aw.atop [5]) begin - state_d = READ_MEM; - end else begin - state_d = IDLE; - end - end else begin - state_d = WRITE_MEM; - end - end - - default: state_d = IDLE; - - - endcase - end - - // ---------------------- - // Output Block - // ---------------------- - always_comb begin : ccu_output_block - logic ar_addr_offset; - - ar_addr_offset = ccu_req_holder.ar.addr[3]; - - // Default Assignments - ccu_req_o = '0; - ccu_resp_o = '0; - s2m_req_o = '0; - - case(state_q) - IDLE: begin - - end - - //--------------------- - //---- Read Branch ---- - //--------------------- - DECODE_R:begin - ccu_resp_o.ar_ready = 'b1; - end - SEND_READ: begin - // send request to snooping masters - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac.addr = ccu_req_holder.ar.addr; - s2m_req_o[n].ac.prot = ccu_req_holder.ar.prot; - s2m_req_o[n].ac.snoop = ccu_req_holder.ar.snoop; - s2m_req_o[n].ac_valid = !ac_ready[n]; - end - end - - SEND_INVALID_R:begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac.addr = ccu_req_holder.ar.addr; - s2m_req_o[n].ac.prot = ccu_req_holder.ar.prot; - s2m_req_o[n].ac.snoop = snoop_pkg::CLEAN_INVALID; - s2m_req_o[n].ac_valid = !ac_ready[n]; - end - end - - WAIT_RESP_R, WAIT_INVALID_W: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cr_ready = !cr_valid[n]; //'b1; - end - - WAIT_INVALID_R: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cr_ready = !cr_valid[n]; //'b1; - - if ((cr_valid == '1) && (!ccu_req_holder.ar.lock)) begin - ccu_resp_o.r = '0; - ccu_resp_o.r.id = ccu_req_holder.ar.id; - ccu_resp_o.r.last = 'b1; - ccu_resp_o.r_valid = 'b1; - end - end - - READ_SNP_DATA: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cd_ready = !cd_last[n] & data_available[n]; - // response to intiating master - if (!r_eot) begin - if (ccu_req_holder.ar.len == 0) begin - // single data request - logic critical_word_valid; - critical_word_valid = (stored_cd_data == ar_addr_offset + 1); - ccu_resp_o.r.data = cd_data[ar_addr_offset]; - ccu_resp_o.r.last = critical_word_valid; - ccu_resp_o.r_valid = critical_word_valid; - end else begin - // cache line request - ccu_resp_o.r.data = cd_data[r_last]; - ccu_resp_o.r.last = r_last; - ccu_resp_o.r_valid = |stored_cd_data; - end - ccu_resp_o.r.id = ccu_req_holder.ar.id; - ccu_resp_o.r.resp[3] = |shared; // update if shared - ccu_resp_o.r.resp[2] = |dirty; // update if any line dirty - end - end - - SEND_AXI_REQ_WRITE_BACK_R: begin - // send writeback request - ccu_req_o.aw_valid = 'b1; - ccu_req_o.aw = '0; //default - ccu_req_o.aw.addr = ccu_req_holder.ar.addr; - ccu_req_o.aw.addr[3:0] = 4'b0; // writeback is always full cache line - ccu_req_o.aw.size = 2'b11; - ccu_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - ccu_req_o.aw.id = {first_responder, ccu_req_holder.ar.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations - ccu_req_o.aw.len = DcacheLineWords-1; - // WRITEBACK - ccu_req_o.aw.domain = 2'b00; - ccu_req_o.aw.snoop = 3'b011; - end - - WRITE_BACK_MEM_R: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cd_ready = !cd_last[n] & data_available[n]; - // write data to slave (RAM) - ccu_req_o.w_valid = |stored_cd_data; - ccu_req_o.w.strb = '1; - ccu_req_o.w.data = cd_data[w_last]; - ccu_req_o.w.last = w_last; - ccu_req_o.b_ready = 'b1; - end - - SEND_AXI_REQ_R: begin - // forward request to slave (RAM) - ccu_req_o.ar_valid = 'b1; - ccu_req_o.ar = ccu_req_holder.ar; - ccu_req_o.r_ready = ccu_req_holder.r_ready ; - end - - READ_MEM: begin - // indicate slave to send data on r channel - ccu_req_o.r_ready = ccu_req_i.r_ready ; - ccu_resp_o.r = ccu_resp_i.r; - ccu_resp_o.r_valid = ccu_resp_i.r_valid; - end - - //--------------------- - //---- Write Branch --- - //--------------------- - DECODE_W: begin - ccu_resp_o.aw_ready = 'b1; - end - - SEND_INVALID_W:begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac.addr = ccu_req_holder.aw.addr; - s2m_req_o[n].ac.prot = ccu_req_holder.aw.prot; - s2m_req_o[n].ac.snoop = snoop_pkg::CLEAN_INVALID; - s2m_req_o[n].ac_valid = !ac_ready[n]; - end - end - - SEND_AXI_REQ_WRITE_BACK_W: begin - // send writeback request - ccu_req_o.aw_valid = 'b1; - ccu_req_o.aw = '0; //default - ccu_req_o.aw.addr = ccu_req_holder.aw.addr; - ccu_req_o.aw.addr[3:0] = 4'b0; // writeback is always full cache line - ccu_req_o.aw.size = 2'b11; - ccu_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - ccu_req_o.aw.id = {first_responder, ccu_req_holder.aw.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations - ccu_req_o.aw.len = DcacheLineWords-1; - // WRITEBACK - ccu_req_o.aw.domain = 2'b00; - ccu_req_o.aw.snoop = 3'b011; - end - - WRITE_BACK_MEM_W: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cd_ready = !cd_last[n] & data_available[n]; - // response to intiating master - if (!r_eot) begin - ccu_req_o.w_valid = |stored_cd_data; - ccu_req_o.w.strb = '1; - ccu_req_o.w.data = cd_data[w_last]; - ccu_req_o.w.last = w_last; - ccu_req_o.b_ready = 'b1; - end - end - - SEND_AXI_REQ_W: begin - // forward request to slave (RAM) - ccu_req_o.aw_valid = 'b1; - ccu_req_o.aw = ccu_req_holder.aw; - end - - WRITE_MEM: begin - ccu_req_o.w = ccu_req_i.w; - ccu_req_o.w_valid = ccu_req_i.w_valid; - ccu_req_o.b_ready = ccu_req_i.b_ready; - - ccu_resp_o.b = ccu_resp_i.b; - ccu_resp_o.b_valid = ccu_resp_i.b_valid; - ccu_resp_o.w_ready = ccu_resp_i.w_ready; - end - - endcase - end // end output block - - // Hold incoming ACE request - always_ff @(posedge clk_i , negedge rst_ni) begin - if(!rst_ni) begin - ccu_req_holder <= '0; - end else if(state_q == IDLE && - ((ccu_req_i.ar_valid & !ccu_req_i.aw_valid) | - (ccu_req_i.ar_valid & prio_q.waiting_r) | - (ccu_req_i.ar_valid & !prio_q.waiting_w))) begin - ccu_req_holder.ar <= ccu_req_i.ar; - ccu_req_holder.ar_valid <= ccu_req_i.ar_valid; - ccu_req_holder.r_ready <= ccu_req_i.r_ready; - - end else if(state_q == IDLE && - ((ccu_req_i.aw_valid & !ccu_req_i.ar_valid) | - (ccu_req_i.aw_valid & prio_q.waiting_w))) begin - ccu_req_holder.aw <= ccu_req_i.aw; - ccu_req_holder.aw_valid <= ccu_req_i.aw_valid; - end - end - - // Hold snoop AC_ready - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - ac_ready <= '0; - ac_valid <= '0; - end else if(state_q == DECODE_R || state_q == DECODE_W) begin - ac_ready <= initiator_q; - end else if(state_q == SEND_READ || state_q == SEND_INVALID_R || state_q == SEND_INVALID_W) begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - ac_ready[i] <= ac_ready[i] | (m2s_resp_i[i].ac_ready & s2m_req_o[i].ac_valid); - ac_valid[i] <= ac_valid[i] | (m2s_resp_i[i].ac_ready & s2m_req_o[i].ac_valid); - end - end else begin - ac_ready <= '0; - ac_valid <= '0; - end - end - - // Hold snoop CR - always_ff @ (posedge clk_i, negedge rst_ni) begin - logic snoop_resp_found; - if(!rst_ni) begin - cr_valid <= '0; - data_available <= '0; - shared <= '0; - dirty <= '0; - response_error <= '0; - first_responder <= '0; - snoop_resp_found <= 1'b0; - end else if(state_q == IDLE) begin - cr_valid <= '0; - data_available <= '0; - shared <= '0; - dirty <= '0; - response_error <= '0; - first_responder <= '0; - snoop_resp_found <= 1'b0; - end else if(state_q == SEND_READ || state_q == SEND_INVALID_R || state_q == SEND_INVALID_W) begin - cr_valid <= initiator_q; - end else begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - if(m2s_resp_i[i].cr_valid & s2m_req_o[i].cr_ready) begin - cr_valid[i] <= cr_valid[i] | 1'b1; - data_available[i] <= m2s_resp_i[i].cr_resp.dataTransfer; - shared[i] <= m2s_resp_i[i].cr_resp.isShared; - dirty[i] <= m2s_resp_i[i].cr_resp.passDirty; - response_error[i] <= m2s_resp_i[i].cr_resp.error; - end - end - if (!snoop_resp_found) begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - if(m2s_resp_i[i].cr_valid & s2m_req_o[i].cr_ready & m2s_resp_i[i].cr_resp.dataTransfer & !m2s_resp_i[i].cr_resp.error) begin - first_responder <= i[MstIdxBits-1:0]; - snoop_resp_found <= 1'b1; - break; - end - end - end - end - end - - // Hold snoop CD - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - data_received <= '0; - cd_last <= '0; - m2s_resp_holder <= '0; - cd_data <= '0; - stored_cd_data <= '0; - end else begin - if(state_q == IDLE) begin - data_received <= '0; - m2s_resp_holder <= '0; - cd_last <= '0; - cd_data <= '0; - stored_cd_data <= '0; - end - else begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - if (state_q == READ_SNP_DATA) begin - if(m2s_resp_i[i].cd_valid) begin - data_received[i] <= m2s_resp_i[i].cd_valid; - cd_last[i] <= cd_last[i] | (m2s_resp_i[i].cd.last & data_available[i]); - m2s_resp_holder[i] <= m2s_resp_i[i]; - end - if (data_received[i] & ccu_resp_o.r_valid) begin - data_received[i] <= '0; - m2s_resp_holder <= '0; - end - if (m2s_resp_i[first_responder].cd_valid & s2m_req_o[first_responder].cd_ready) begin - cd_data[m2s_resp_i[first_responder].cd.last] <= m2s_resp_i[first_responder].cd.data; - end - if (s2m_req_o[first_responder].cd_ready & m2s_resp_i[first_responder].cd_valid & !(ccu_resp_o.r_valid & ccu_req_i.r_ready)) begin - stored_cd_data <= stored_cd_data + 1; - end else if(ccu_resp_o.r_valid & ccu_req_i.r_ready & !(s2m_req_o[first_responder].cd_ready & m2s_resp_i[first_responder].cd_valid)) begin - stored_cd_data <= stored_cd_data - 1; - end - end else if (state_q == WRITE_BACK_MEM_R || state_q == WRITE_BACK_MEM_W) begin - if(m2s_resp_i[i].cd_valid) begin - data_received[i] <= m2s_resp_i[i].cd_valid; - cd_last[i] <= cd_last[i] | (m2s_resp_i[i].cd.last & data_available[i]); - m2s_resp_holder[i] <= m2s_resp_i[i]; - end - if (data_received[i] & ccu_req_o.w_valid) begin - data_received[i] <= '0; - m2s_resp_holder <= '0; - end - if (m2s_resp_i[first_responder].cd_valid & s2m_req_o[first_responder].cd_ready) begin - cd_data[m2s_resp_i[first_responder].cd.last] <= m2s_resp_i[first_responder].cd.data; - end - if (s2m_req_o[first_responder].cd_ready & m2s_resp_i[first_responder].cd_valid & !(ccu_req_o.w_valid & ccu_resp_i.w_ready)) begin - stored_cd_data <= stored_cd_data + 1; - end else if(ccu_req_o.w_valid & ccu_resp_i.w_ready & !(s2m_req_o[first_responder].cd_ready & m2s_resp_i[first_responder].cd_valid)) begin - stored_cd_data <= stored_cd_data - 1; - end - end - end - end - end - end - - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - r_last <= 1'b0; - r_eot <= 1'b0; - end else begin - if(state_q == IDLE) begin - r_last <= 1'b0; - r_eot <= 1'b0; - end else if (ccu_req_i.r_ready & ccu_resp_o.r_valid) begin - r_last <= !r_last; - if (ccu_resp_o.r.last) - r_eot <= 1'b1; - end - end - end - - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - w_last <= 1'b0; - w_eot <= 1'b0; - end else begin - if(state_q == IDLE) begin - w_last <= 1'b0; - w_eot <= 1'b0; - end else if (ccu_resp_i.w_ready & ccu_req_o.w_valid) begin - w_last <= !w_last; - if (w_last) - w_eot <= 1'b1; - end - end - end - - `ifndef VERILATOR - // pragma translate_off - initial begin - a_dcache_line_words : assert (DcacheLineWords == 2) else - $error("The ccu_fsm module is currently hardcoded to only support DcacheLineWidth = 2 * AxiDataWidth"); - end - // pragma translate_on - `endif - - - -endmodule From 096456d34befe3c4a8cb90fea04f387998cd57a6 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Apr 2024 09:46:16 +0200 Subject: [PATCH 002/109] Define a single Legacy param and propagate it --- src/ccu_ctrl.sv | 4 ++++ src/ccu_ctrl_decoder.sv | 5 +---- src/ccu_ctrl_memory_unit.sv | 3 +-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index b7fcb8f..69b7c93 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -38,6 +38,8 @@ module ccu_ctrl import ccu_ctrl_pkg::*; input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i ); +localparam bit Legacy = 1; + localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; localparam int unsigned MstIdxBits = $clog2(NoMstPorts); @@ -79,6 +81,7 @@ for (genvar i = 0; i < NoMstPorts; i++) begin end ccu_ctrl_decoder #( + .Legacy (Legacy), .DcacheLineWidth (DcacheLineWidth), .AxiDataWidth (AxiDataWidth), .NoMstPorts (NoMstPorts), @@ -158,6 +161,7 @@ ccu_ctrl_snoop_unit #( ); ccu_ctrl_memory_unit #( + .Legacy (Legacy), .DcacheLineWidth (DcacheLineWidth), .AxiDataWidth (AxiDataWidth), .NoMstPorts (NoMstPorts), diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 3dd6c68..fe727cf 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -16,6 +16,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; parameter type snoop_cd_t = logic, parameter type snoop_req_t = logic, parameter type snoop_resp_t = logic, + parameter bit Legacy = 1, localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, localparam int unsigned MstIdxBits = $clog2(NoMstPorts) ) ( @@ -174,10 +175,6 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign first_responder_o = first_responder_q; - - localparam Legacy = 1; - - // ---------------------- // Current State Block // ---------------------- diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index fb89783..78aea79 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -16,6 +16,7 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; parameter type snoop_cd_t = logic, parameter type snoop_req_t = logic, parameter type snoop_resp_t = logic, + parameter bit Legacy = 1, localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, localparam int unsigned MstIdxBits = $clog2(NoMstPorts) ) ( @@ -82,8 +83,6 @@ logic ar_valid_out, aw_valid_out; logic cd_data_incoming; -localparam Legacy = 1; - always_comb begin mu_ready_o = 1'b0; ax_state_d = ax_state_q; From 38866adc6e317846dd17d97b57eca0393d292cb6 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Apr 2024 17:01:17 +0200 Subject: [PATCH 003/109] Add AMO support and make fixes to memory unit --- src/ccu_ctrl_memory_unit.sv | 79 +++++++++++++++++++++++-------------- src/ccu_ctrl_pkg.sv | 5 ++- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 78aea79..c365295 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -83,6 +83,9 @@ logic ar_valid_out, aw_valid_out; logic cd_data_incoming; +logic w_busy_d, w_busy_q; +logic w_last_d, w_last_q; + always_comb begin mu_ready_o = 1'b0; ax_state_d = ax_state_q; @@ -97,8 +100,11 @@ always_comb begin cd_data_incoming = 1'b0; + w_busy_d = w_busy_q; + case (ax_state_q) Ax_IDLE: begin + w_busy_d = 1'b0; mu_ready_o = 1'b1; if (mu_valid_i) begin sample_dec_data = 1'b1; @@ -109,16 +115,13 @@ always_comb begin Ax_BUSY: begin case (ax_op_q) SEND_AXI_REQ_R: begin - // If a lock is present, wait for W to complete - if (!ccu_req_holder_q.ar.lock || ccu_resp_i.w_ready) begin - ar_valid_out = 'b1; - ar_out = ccu_req_holder_q.ar; - if (ccu_resp_i.ar_ready) begin - if (Legacy) - ax_op_d = LEGACY_WAIT_READ; - else - ax_state_d = Ax_IDLE; - end + ar_valid_out = 'b1; + ar_out = ccu_req_holder_q.ar; + if (ccu_resp_i.ar_ready) begin + if (Legacy) + ax_op_d = AMO_WAIT_READ; + else + ax_state_d = Ax_IDLE; end end SEND_AXI_REQ_WRITE_BACK_R: begin @@ -136,19 +139,26 @@ always_comb begin aw_out.domain = 2'b00; aw_out.snoop = 3'b011; if (ccu_resp_i.aw_ready) begin - if (ccu_req_holder_q.ar.lock) - ax_op_d = SEND_AXI_REQ_R; - else if (Legacy) - ax_op_d = LEGACY_WAIT_WB; + if (Legacy) + ax_op_d = AMO_WAIT_WB_R; + else if (ccu_req_holder_q.ar.lock) + // Blocking behavior for AMO operations + // TODO: check if truly needed + ax_op_d = AMO_WAIT_WB_R; else ax_state_d = Ax_IDLE; end end SEND_AXI_REQ_W: begin + w_busy_d = 1'b1; aw_valid_out = 'b1; aw_out = ccu_req_holder_q.aw; if (ccu_resp_i.aw_ready) begin - if (Legacy) + if (ccu_req_holder_q.aw.atop[5]) + // Blocking behavior for AMO operations + // TODO: check if truly needed + ax_op_d = AMO_WAIT_READ; + else if (Legacy) ax_op_d = LEGACY_WAIT_WRITE; else ax_state_d = Ax_IDLE; @@ -170,22 +180,30 @@ always_comb begin aw_out.snoop = 3'b011; if (ccu_resp_i.aw_ready) begin if (Legacy) - ax_op_d = LEGACY_WAIT_WB; + ax_op_d = LEGACY_WAIT_WB_W; else - ax_state_d = Ax_IDLE; + ax_op_d = SEND_AXI_REQ_W; end end + AMO_WAIT_READ: begin + if(ccu_resp_i.r_valid && ccu_req_i.r_ready && ccu_resp_i.r.last) + ax_state_d = Ax_IDLE; + end LEGACY_WAIT_WRITE: begin if(ccu_resp_i.b_valid && ccu_req_i.b_ready) ax_state_d = Ax_IDLE; end - LEGACY_WAIT_READ: begin - if(ccu_resp_i.r_valid && ccu_req_i.r_ready && ccu_resp_i.r.last) - ax_state_d = Ax_IDLE; + AMO_WAIT_WB_R: begin + if(ccu_resp_i.b_valid && ccu_req_o.b_ready) + if (ccu_req_holder_q.ar.lock) begin + ax_op_d = SEND_AXI_REQ_R; + end else begin + ax_state_d = Ax_IDLE; + end end - LEGACY_WAIT_WB: begin + LEGACY_WAIT_WB_W: begin if(ccu_resp_i.b_valid && ccu_req_o.b_ready) - ax_state_d = Ax_IDLE; + ax_op_d = SEND_AXI_REQ_W; end endcase end @@ -197,23 +215,28 @@ logic [$clog2(DcacheLineWords)-1:0] fifo_usage; enum { FIFO_IDLE, FIFO_LOWER_HALF, FIFO_UPPER_HALF, FIFO_WAIT } fifo_state_q, fifo_state_d; -logic w_busy_d, w_busy_q; -logic w_last_d, w_last_q; - always_ff @(posedge clk_i or negedge rst_ni) begin if(!rst_ni) begin fifo_state_q <= FIFO_IDLE; - w_busy_q <= 1'b0; fifo_first_responder_q <= '0; w_last_q <= 1'b0; end else begin fifo_state_q <= fifo_state_d; - w_busy_q <= w_busy_d; fifo_first_responder_q <= fifo_first_responder_d; w_last_q <= w_last_d; end end +always_ff @(posedge clk_i or negedge rst_ni) begin + if(!rst_ni) begin + w_busy_q <= 1'b0; + end else if(ccu_resp_i.b_valid && ccu_req_o.b_ready) begin + w_busy_q <= 1'b0; + end else begin + w_busy_q <= w_busy_d; + end +end + logic fifo_push, fifo_flush, fifo_pop, fifo_full, fifo_empty; always_comb begin @@ -314,7 +337,6 @@ assign ccu_req_o.r_ready = ccu_req_i.r_ready; always_comb begin - w_busy_d = 1'b0; w_last_d = 1'b0; // W and B @@ -327,7 +349,6 @@ always_comb begin ccu_req_o.w.last = w_last_q; ccu_req_o.b_ready = 'b1; end else begin - w_busy_d = (ccu_req_i.w_valid && !ccu_resp_i.w_ready) || (w_busy_q && !(ccu_resp_i.b_valid && ccu_req_i.b_ready)); ccu_req_o.w = ccu_req_i.w; ccu_req_o.w_valid = ccu_req_i.w_valid; ccu_req_o.b_ready = ccu_req_i.b_ready; diff --git a/src/ccu_ctrl_pkg.sv b/src/ccu_ctrl_pkg.sv index ee25ef1..80b88db 100644 --- a/src/ccu_ctrl_pkg.sv +++ b/src/ccu_ctrl_pkg.sv @@ -5,9 +5,10 @@ package ccu_ctrl_pkg; SEND_AXI_REQ_WRITE_BACK_R, SEND_AXI_REQ_W, SEND_AXI_REQ_WRITE_BACK_W, + AMO_WAIT_READ, LEGACY_WAIT_WRITE, - LEGACY_WAIT_READ, - LEGACY_WAIT_WB + LEGACY_WAIT_WB_W, + AMO_WAIT_WB_R } mu_op_e; typedef enum logic { From 542f384120aa1401d5ab79060b80bf7c6cded324 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 15 Apr 2024 13:54:24 +0200 Subject: [PATCH 004/109] Simplify Ax state encoding --- src/ccu_ctrl_memory_unit.sv | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index c365295..45469e3 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -63,15 +63,15 @@ always_ff @(posedge clk_i , negedge rst_ni) begin end end -enum {Ax_IDLE, Ax_BUSY} ax_state_q, ax_state_d; +logic ax_busy_q, ax_busy_d; mu_op_e ax_op_q, ax_op_d; always_ff @(posedge clk_i , negedge rst_ni) begin if(!rst_ni) begin - ax_state_q <= Ax_IDLE; + ax_busy_q <= 1'b0; ax_op_q <= SEND_AXI_REQ_R; end else begin - ax_state_q <= ax_state_d; + ax_busy_q <= ax_busy_d; ax_op_q <= ax_op_d; end end @@ -88,7 +88,7 @@ logic w_last_d, w_last_q; always_comb begin mu_ready_o = 1'b0; - ax_state_d = ax_state_q; + ax_busy_d = ax_busy_q; ax_op_d = ax_op_q; sample_dec_data = 1'b0; @@ -102,17 +102,17 @@ always_comb begin w_busy_d = w_busy_q; - case (ax_state_q) - Ax_IDLE: begin + case (ax_busy_q) + 1'b0: begin w_busy_d = 1'b0; mu_ready_o = 1'b1; if (mu_valid_i) begin sample_dec_data = 1'b1; ax_op_d = mu_op_i; - ax_state_d = Ax_BUSY; + ax_busy_d = 1'b1; end end - Ax_BUSY: begin + 1'b1: begin case (ax_op_q) SEND_AXI_REQ_R: begin ar_valid_out = 'b1; @@ -121,7 +121,7 @@ always_comb begin if (Legacy) ax_op_d = AMO_WAIT_READ; else - ax_state_d = Ax_IDLE; + ax_busy_d = 1'b0; end end SEND_AXI_REQ_WRITE_BACK_R: begin @@ -146,7 +146,7 @@ always_comb begin // TODO: check if truly needed ax_op_d = AMO_WAIT_WB_R; else - ax_state_d = Ax_IDLE; + ax_busy_d = 1'b0; end end SEND_AXI_REQ_W: begin @@ -161,7 +161,7 @@ always_comb begin else if (Legacy) ax_op_d = LEGACY_WAIT_WRITE; else - ax_state_d = Ax_IDLE; + ax_busy_d = 1'b0; end end SEND_AXI_REQ_WRITE_BACK_W: begin @@ -187,18 +187,18 @@ always_comb begin end AMO_WAIT_READ: begin if(ccu_resp_i.r_valid && ccu_req_i.r_ready && ccu_resp_i.r.last) - ax_state_d = Ax_IDLE; + ax_busy_d = 1'b0; end LEGACY_WAIT_WRITE: begin if(ccu_resp_i.b_valid && ccu_req_i.b_ready) - ax_state_d = Ax_IDLE; + ax_busy_d = 1'b0; end AMO_WAIT_WB_R: begin if(ccu_resp_i.b_valid && ccu_req_o.b_ready) if (ccu_req_holder_q.ar.lock) begin ax_op_d = SEND_AXI_REQ_R; end else begin - ax_state_d = Ax_IDLE; + ax_busy_d = 1'b0; end end LEGACY_WAIT_WB_W: begin From f7960b583f2c2a4b1448f1bce345e90c4f199b7d Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 15 Apr 2024 15:58:08 +0200 Subject: [PATCH 005/109] Remove commented out code --- src/ccu_ctrl_snoop_unit.sv | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/ccu_ctrl_snoop_unit.sv b/src/ccu_ctrl_snoop_unit.sv index 173d066..2a6f0cb 100644 --- a/src/ccu_ctrl_snoop_unit.sv +++ b/src/ccu_ctrl_snoop_unit.sv @@ -171,17 +171,6 @@ always_comb begin r_o.last = 1'b1; // No further transactions if (!fifo_empty) begin - // // Single data request - // if (ccu_req_holder_q.ar.len == 0) begin - // // The upper 64 bit are required - // if (ar_addr_offset) begin - // r_valid_o = 1'b1; // There is something to send - // end - // end else begin - // // Full cacheline request - // r_valid_o = 1'b1; // There is something to send - // end - r_valid_o = 1'b1; if (r_ready_i) begin From 9c2f3851162bc1c254e2d297434ed242db40c9d8 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 17 Apr 2024 18:19:46 +0200 Subject: [PATCH 006/109] Move towards non-blocking implementation * Rework the memory unit to use AXI channels parallelism * Use AXI FIFO to buffer memory transactions * Several bug fixes * Remove Legacy support --- src/ccu_ctrl.sv | 4 - src/ccu_ctrl_decoder.sv | 9 +- src/ccu_ctrl_memory_unit.sv | 252 ++++++++++++++++++++++++------------ src/ccu_ctrl_pkg.sv | 5 +- src/ccu_ctrl_snoop_unit.sv | 7 +- 5 files changed, 174 insertions(+), 103 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 69b7c93..b7fcb8f 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -38,8 +38,6 @@ module ccu_ctrl import ccu_ctrl_pkg::*; input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i ); -localparam bit Legacy = 1; - localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; localparam int unsigned MstIdxBits = $clog2(NoMstPorts); @@ -81,7 +79,6 @@ for (genvar i = 0; i < NoMstPorts; i++) begin end ccu_ctrl_decoder #( - .Legacy (Legacy), .DcacheLineWidth (DcacheLineWidth), .AxiDataWidth (AxiDataWidth), .NoMstPorts (NoMstPorts), @@ -161,7 +158,6 @@ ccu_ctrl_snoop_unit #( ); ccu_ctrl_memory_unit #( - .Legacy (Legacy), .DcacheLineWidth (DcacheLineWidth), .AxiDataWidth (AxiDataWidth), .NoMstPorts (NoMstPorts), diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index fe727cf..0508628 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -16,7 +16,6 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; parameter type snoop_cd_t = logic, parameter type snoop_req_t = logic, parameter type snoop_resp_t = logic, - parameter bit Legacy = 1, localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, localparam int unsigned MstIdxBits = $clog2(NoMstPorts) ) ( @@ -221,7 +220,6 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; initiator_d = '0; prio_d = '0; - if (!Legacy || (mu_ready_i && su_ready_i)) begin // wait for incoming valid request from master if(ccu_req_i.ar_valid & prio_r) begin decode_r = 1'b1; @@ -237,7 +235,6 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; slv_ar_ready_o = prio_r; slv_aw_ready_o = prio_w; - end end SEND_READ: begin @@ -311,6 +308,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; if (mu_ready_i && (ccu_req_holder_q.ar.lock || su_ready_i)) begin state_d = IDLE; + su_valid_o = !ccu_req_holder_q.ar.lock; end if(|(data_available_q & ~response_error_q)) begin @@ -322,10 +320,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; end end - if (cr_handshake_q == '1 && !ccu_req_holder_q.ar.lock) begin - su_op_o = SEND_INVALID_ACK_R; - su_valid_o = 1'b1; - end + su_op_o = SEND_INVALID_ACK_R; for (int unsigned n = 0; n < NoMstPorts; n = n + 1) s2m_req_o[n].cr_ready = !cr_handshake_q[n]; diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 45469e3..1707267 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -16,7 +16,6 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; parameter type snoop_cd_t = logic, parameter type snoop_req_t = logic, parameter type snoop_resp_t = logic, - parameter bit Legacy = 1, localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, localparam int unsigned MstIdxBits = $clog2(NoMstPorts) ) ( @@ -45,12 +44,17 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; localparam FIFO_DEPTH = 2; +mst_req_t ccu_req_out; +mst_resp_t ccu_resp_in; + mst_req_t ccu_req_holder_q; logic [MstIdxBits-1:0] first_responder_q, fifo_first_responder_q, fifo_first_responder_d; logic [NoMstPorts-1:0] data_available_q; logic sample_dec_data; +logic fifo_push, fifo_flush, fifo_pop, fifo_full, fifo_empty; + always_ff @(posedge clk_i , negedge rst_ni) begin if(!rst_ni) begin ccu_req_holder_q <= '0; @@ -83,9 +87,10 @@ logic ar_valid_out, aw_valid_out; logic cd_data_incoming; -logic w_busy_d, w_busy_q; logic w_last_d, w_last_q; +logic [$bits(ccu_resp_in.b.id)-1:0] wb_id_q, wb_id_d; + always_comb begin mu_ready_o = 1'b0; ax_busy_d = ax_busy_q; @@ -100,11 +105,10 @@ always_comb begin cd_data_incoming = 1'b0; - w_busy_d = w_busy_q; + wb_id_d = wb_id_q; case (ax_busy_q) 1'b0: begin - w_busy_d = 1'b0; mu_ready_o = 1'b1; if (mu_valid_i) begin sample_dec_data = 1'b1; @@ -117,17 +121,15 @@ always_comb begin SEND_AXI_REQ_R: begin ar_valid_out = 'b1; ar_out = ccu_req_holder_q.ar; - if (ccu_resp_i.ar_ready) begin - if (Legacy) - ax_op_d = AMO_WAIT_READ; - else - ax_busy_d = 1'b0; + if (ccu_resp_in.ar_ready) begin + ax_busy_d = 1'b0; end end SEND_AXI_REQ_WRITE_BACK_R: begin + wb_id_d = {first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}; cd_data_incoming = 1'b1; // send writeback request - aw_valid_out = 'b1; + aw_valid_out = fifo_empty; aw_out = '0; //default aw_out.addr = ccu_req_holder_q.ar.addr; aw_out.addr[3:0] = 4'b0; // writeback is always full cache line @@ -138,36 +140,34 @@ always_comb begin // WRITEBACK aw_out.domain = 2'b00; aw_out.snoop = 3'b011; - if (ccu_resp_i.aw_ready) begin - if (Legacy) - ax_op_d = AMO_WAIT_WB_R; - else if (ccu_req_holder_q.ar.lock) + if (ccu_resp_in.aw_ready && fifo_empty) begin + if (ccu_req_holder_q.ar.lock) // Blocking behavior for AMO operations // TODO: check if truly needed ax_op_d = AMO_WAIT_WB_R; else - ax_busy_d = 1'b0; + ax_busy_d = !ccu_resp_in.w_ready; end end SEND_AXI_REQ_W: begin - w_busy_d = 1'b1; - aw_valid_out = 'b1; - aw_out = ccu_req_holder_q.aw; - if (ccu_resp_i.aw_ready) begin - if (ccu_req_holder_q.aw.atop[5]) - // Blocking behavior for AMO operations - // TODO: check if truly needed - ax_op_d = AMO_WAIT_READ; - else if (Legacy) - ax_op_d = LEGACY_WAIT_WRITE; - else - ax_busy_d = 1'b0; + if (wb_id_q != ccu_req_holder_q.aw.id || !cd_busy_o) begin + aw_valid_out = 'b1; + aw_out = ccu_req_holder_q.aw; + if (ccu_resp_in.aw_ready) begin + if (ccu_req_holder_q.aw.atop[5]) + // Blocking behavior for AMO operations + // TODO: check if truly needed + ax_op_d = AMO_WAIT_READ; + else + ax_busy_d = !ccu_resp_in.w_ready; + end end end SEND_AXI_REQ_WRITE_BACK_W: begin + wb_id_d = {first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}; cd_data_incoming = 1'b1; // send writeback request - aw_valid_out = 'b1; + aw_valid_out = fifo_empty; aw_out = '0; //default aw_out.addr = ccu_req_holder_q.aw.addr; aw_out.addr[3:0] = 4'b0; // writeback is always full cache line @@ -178,31 +178,26 @@ always_comb begin // WRITEBACK aw_out.domain = 2'b00; aw_out.snoop = 3'b011; - if (ccu_resp_i.aw_ready) begin - if (Legacy) - ax_op_d = LEGACY_WAIT_WB_W; + if (ccu_resp_in.aw_ready && fifo_empty) begin + if (ccu_req_holder_q.aw.atop[5]) + ax_op_d = AMO_WAIT_WB_W; else ax_op_d = SEND_AXI_REQ_W; end end AMO_WAIT_READ: begin - if(ccu_resp_i.r_valid && ccu_req_i.r_ready && ccu_resp_i.r.last) - ax_busy_d = 1'b0; - end - LEGACY_WAIT_WRITE: begin - if(ccu_resp_i.b_valid && ccu_req_i.b_ready) + if(ccu_resp_in.r_valid && ccu_req_i.r_ready && ccu_resp_in.r.last + && ccu_resp_in.r.id == ccu_req_holder_q.aw.id) ax_busy_d = 1'b0; end AMO_WAIT_WB_R: begin - if(ccu_resp_i.b_valid && ccu_req_o.b_ready) - if (ccu_req_holder_q.ar.lock) begin - ax_op_d = SEND_AXI_REQ_R; - end else begin - ax_busy_d = 1'b0; - end + if(ccu_resp_in.b_valid && ccu_req_out.b_ready + && ccu_resp_in.b.id == {first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}) + ax_op_d = SEND_AXI_REQ_R; end - LEGACY_WAIT_WB_W: begin - if(ccu_resp_i.b_valid && ccu_req_o.b_ready) + AMO_WAIT_WB_W: begin + if(ccu_resp_in.b_valid && ccu_req_out.b_ready && + ccu_resp_in.b.id == {first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}) ax_op_d = SEND_AXI_REQ_W; end endcase @@ -210,6 +205,10 @@ always_comb begin endcase end +typedef enum logic [1:0] {W_IDLE, W_PASSTHROUGH, W_FROM_FIFO_W, W_FROM_FIFO_R} w_state_t; + +w_state_t w_state_q, w_state_d; + logic [AxiDataWidth-1:0] fifo_data_in, fifo_data_out; logic [$clog2(DcacheLineWords)-1:0] fifo_usage; @@ -219,26 +218,12 @@ always_ff @(posedge clk_i or negedge rst_ni) begin if(!rst_ni) begin fifo_state_q <= FIFO_IDLE; fifo_first_responder_q <= '0; - w_last_q <= 1'b0; end else begin fifo_state_q <= fifo_state_d; fifo_first_responder_q <= fifo_first_responder_d; - w_last_q <= w_last_d; - end -end - -always_ff @(posedge clk_i or negedge rst_ni) begin - if(!rst_ni) begin - w_busy_q <= 1'b0; - end else if(ccu_resp_i.b_valid && ccu_req_o.b_ready) begin - w_busy_q <= 1'b0; - end else begin - w_busy_q <= w_busy_d; end end -logic fifo_push, fifo_flush, fifo_pop, fifo_full, fifo_empty; - always_comb begin fifo_state_d = fifo_state_q; fifo_first_responder_d = fifo_first_responder_q; @@ -261,7 +246,7 @@ always_comb begin end end FIFO_WAIT: begin - if (ccu_resp_i.b_valid && ccu_req_o.b_ready) + if (ccu_resp_in.b_valid && ccu_req_out.b_ready && ccu_resp_in.b.id == wb_id_q) fifo_state_d = FIFO_IDLE; end endcase @@ -270,9 +255,9 @@ end assign cd_busy_o = fifo_state_q != FIFO_IDLE; assign fifo_push = cd_busy_o && cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]; -assign fifo_flush = !cd_busy_o; +assign fifo_flush = fifo_state_q == FIFO_IDLE; assign fifo_data_in = cd_i[fifo_first_responder_q].data; -assign fifo_pop = w_busy_q ? '0 : ccu_resp_i.w_ready && ccu_req_o.w_valid; +assign fifo_pop = w_state_q inside {W_FROM_FIFO_W, W_FROM_FIFO_R} ? ccu_resp_in.w_ready && ccu_req_out.w_valid : '0; fifo_v3 #( @@ -323,40 +308,135 @@ always_comb begin end // AR -assign ccu_req_o.ar = ar_out; -assign ccu_req_o.ar_valid = ar_valid_out; +assign ccu_req_out.ar = ar_out; +assign ccu_req_out.ar_valid = ar_valid_out; // AW -assign ccu_req_o.aw = aw_out; -assign ccu_req_o.aw_valid = aw_valid_out; +assign ccu_req_out.aw = aw_out; +assign ccu_req_out.aw_valid = aw_valid_out; // R passthrough -assign ccu_resp_o.r = ccu_resp_i.r; -assign ccu_resp_o.r_valid = ccu_resp_i.r_valid; -assign ccu_req_o.r_ready = ccu_req_i.r_ready; +assign ccu_resp_o.r = ccu_resp_in.r; +assign ccu_resp_o.r_valid = ccu_resp_in.r_valid; +assign ccu_req_out.r_ready = ccu_req_i.r_ready; -always_comb begin +// W and B - w_last_d = 1'b0; - - // W and B - // Connect the FIFO as long as the transmission is ongoing - if (cd_busy_o && !w_busy_q) begin - w_last_d = ccu_resp_i.w_ready && !fifo_empty; - ccu_req_o.w_valid = !fifo_empty; - ccu_req_o.w.strb = '1; - ccu_req_o.w.data = fifo_data_out; - ccu_req_o.w.last = w_last_q; - ccu_req_o.b_ready = 'b1; +always_ff @(posedge clk_i or negedge rst_ni) begin + if(!rst_ni) begin + w_state_q <= W_IDLE; + w_last_q <= 1'b0; end else begin - ccu_req_o.w = ccu_req_i.w; - ccu_req_o.w_valid = ccu_req_i.w_valid; - ccu_req_o.b_ready = ccu_req_i.b_ready; + w_state_q <= w_state_d; + w_last_q <= w_last_d; + end +end + +logic wb_expected_q; - ccu_resp_o.b = ccu_resp_i.b; - ccu_resp_o.b_valid = ccu_resp_i.b_valid; - ccu_resp_o.w_ready = ccu_resp_i.w_ready; +always_ff @(posedge clk_i or negedge rst_ni) begin + if(!rst_ni) begin + wb_expected_q <= 1'b0; + wb_id_q <= '0; + end else if(ccu_resp_in.b_valid && + ccu_req_out.b_ready && + ccu_resp_in.b.id == wb_id_q) begin + wb_expected_q <= 1'b0; + wb_id_q <= '0; + end else if(cd_data_incoming) begin + wb_expected_q <= 1'b1; + wb_id_q <= wb_id_d; end end + + +always_comb begin + w_last_d = w_last_q; + w_state_d = w_state_q; + + ccu_req_out.w = ccu_req_i.w; + ccu_req_out.w_valid = 1'b0; + ccu_resp_o.w_ready = 1'b0; + + case (w_state_q) + W_IDLE: begin + w_last_d = 1'b0; + if (ax_busy_q && ccu_req_out.aw_valid) begin + case (ax_op_q) + SEND_AXI_REQ_WRITE_BACK_W: begin + w_state_d = W_FROM_FIFO_W; + end + SEND_AXI_REQ_WRITE_BACK_R: + w_state_d = W_FROM_FIFO_R; + SEND_AXI_REQ_W: begin + w_state_d = W_PASSTHROUGH; + end + default: + w_state_d = W_IDLE; + endcase + end + end + W_PASSTHROUGH: begin + ccu_req_out.w_valid = ccu_req_i.w_valid; + ccu_resp_o.w_ready = ccu_resp_in.w_ready; + + if(ccu_resp_in.w_ready && ccu_req_i.w_valid && ccu_req_i.w.last) + w_state_d = W_IDLE; + end + W_FROM_FIFO_R, W_FROM_FIFO_W: begin + // Connect the FIFO as long as the transmission is ongoing + w_last_d = ccu_resp_in.w_ready && !fifo_empty; + ccu_req_out.w_valid = !fifo_empty; + ccu_req_out.w.strb = '1; + ccu_req_out.w.data = fifo_data_out; + ccu_req_out.w.last = w_last_q; + + if(ccu_resp_in.w_ready && !fifo_empty && w_last_q) + if (w_state_q == W_FROM_FIFO_W) begin + w_state_d = ax_busy_q && ax_op_q == AMO_WAIT_WB_W ? W_IDLE : W_PASSTHROUGH; + end else begin + w_state_d = W_IDLE; + end + end + endcase +end + +assign ccu_resp_o.b = ccu_resp_in.b; + +always_comb begin + ccu_req_out.b_ready = 1'b0; + ccu_resp_o.b_valid = 1'b0; + + if (wb_expected_q && ccu_resp_in.b.id == wb_id_q) begin + ccu_req_out.b_ready = 'b1; + end else begin + ccu_req_out.b_ready = ccu_req_i.b_ready; + ccu_resp_o.b_valid = ccu_resp_in.b_valid; + end +end + + +axi_fifo #( + .Depth (4), + .aw_chan_t (mst_aw_chan_t), + .w_chan_t (w_chan_t), + .b_chan_t (mst_b_chan_t), + .ar_chan_t (mst_ar_chan_t), + .r_chan_t (mst_r_chan_t), + .axi_req_t (mst_req_t), + .axi_resp_t(mst_resp_t) +) fifo_to_from_mem_i ( + .clk_i, + .rst_ni, + .test_i (1'b0), + // slave port + .slv_req_i (ccu_req_out), + .slv_resp_o (ccu_resp_in), + // master port + .mst_req_o (ccu_req_o), + .mst_resp_i (ccu_resp_i) +); + + endmodule \ No newline at end of file diff --git a/src/ccu_ctrl_pkg.sv b/src/ccu_ctrl_pkg.sv index 80b88db..a9c529e 100644 --- a/src/ccu_ctrl_pkg.sv +++ b/src/ccu_ctrl_pkg.sv @@ -6,9 +6,8 @@ package ccu_ctrl_pkg; SEND_AXI_REQ_W, SEND_AXI_REQ_WRITE_BACK_W, AMO_WAIT_READ, - LEGACY_WAIT_WRITE, - LEGACY_WAIT_WB_W, - AMO_WAIT_WB_R + AMO_WAIT_WB_R, + AMO_WAIT_WB_W } mu_op_e; typedef enum logic { diff --git a/src/ccu_ctrl_snoop_unit.sv b/src/ccu_ctrl_snoop_unit.sv index 2a6f0cb..a923471 100644 --- a/src/ccu_ctrl_snoop_unit.sv +++ b/src/ccu_ctrl_snoop_unit.sv @@ -117,8 +117,10 @@ always_comb begin r_o.id = ccu_req_holder_i.ar.id; r_o.last = 'b1; r_valid_o = 'b1; - if (!r_ready_i) + if (!r_ready_i) begin state_d = WAIT_R_READY; + sample_dec_data = 1'b1; + end end else if (su_op_i == READ_SNP_DATA) begin sample_dec_data = 1'b1; state_d = SEND_LOWER_HALF; @@ -181,9 +183,8 @@ always_comb begin end WAIT_R_READY: begin - r_o = '0; - r_o.id = ccu_req_holder_i.ar.id; + r_o.id = ccu_req_holder_q.ar.id; r_o.last = 'b1; r_valid_o = 'b1; From f46f1ab577e33859710a1f6f8e8af0b76d5c1cb0 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 17 Apr 2024 18:28:57 +0200 Subject: [PATCH 007/109] Add some relevant comments and TODOs --- src/ccu_ctrl_memory_unit.sv | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 1707267..7767e4b 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -150,6 +150,10 @@ always_comb begin end end SEND_AXI_REQ_W: begin + // This is a hotfix to avoid serving requests from the core + // with the same ID of the writeback + // TODO: add a bit to the ID to differentiate between WB issued + // by the CCU and requests forwarded from the cores if (wb_id_q != ccu_req_holder_q.aw.id || !cd_busy_o) begin aw_valid_out = 'b1; aw_out = ccu_req_holder_q.aw; @@ -246,6 +250,7 @@ always_comb begin end end FIFO_WAIT: begin + // TODO: rework to get rid of this state if (ccu_resp_in.b_valid && ccu_req_out.b_ready && ccu_resp_in.b.id == wb_id_q) fifo_state_d = FIFO_IDLE; end @@ -394,6 +399,10 @@ always_comb begin if(ccu_resp_in.w_ready && !fifo_empty && w_last_q) if (w_state_q == W_FROM_FIFO_W) begin + // This checks is just to ensure that the cores have visibility + // on the W channel only when we actually want to write something + // Removing it would cause a premature forwarding of a W req + // TODO: make this less convoluted w_state_d = ax_busy_q && ax_op_q == AMO_WAIT_WB_W ? W_IDLE : W_PASSTHROUGH; end else begin w_state_d = W_IDLE; From 8d29df1bda02c157d3ce8fc772ba9c4aecb015c0 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 18 Apr 2024 16:06:54 +0200 Subject: [PATCH 008/109] Add collision checks and fix memory CD FIFO handling * Checks are currently performed by stalling requests targeted at the same set --- src/ccu_ctrl.sv | 145 +++++++++++++++++++++++++++++++++++- src/ccu_ctrl_decoder.sv | 34 +++++++-- src/ccu_ctrl_memory_unit.sv | 36 ++++----- 3 files changed, 188 insertions(+), 27 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index b7fcb8f..09e39dd 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -38,8 +38,13 @@ module ccu_ctrl import ccu_ctrl_pkg::*; input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i ); -localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; -localparam int unsigned MstIdxBits = $clog2(NoMstPorts); +import axi_pkg::*; +import ariane_pkg::*; + +localparam int unsigned AxiAddrWidth = 64; +localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; +localparam int unsigned DCacheByteOffset = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8); +localparam int unsigned MstIdxBits = $clog2(NoMstPorts); mst_resp_t mu_ccu_resp; @@ -78,6 +83,8 @@ for (genvar i = 0; i < NoMstPorts; i++) begin assign cd_valid[i] = m2s_resp_i[i].cd_valid; end +logic dec_lookup_req, dec_collision; + ccu_ctrl_decoder #( .DcacheLineWidth (DcacheLineWidth), .AxiDataWidth (AxiDataWidth), @@ -117,7 +124,10 @@ ccu_ctrl_decoder #( .shared_o (dec_shared), .dirty_o (dec_dirty), .data_available_o (data_available), - .first_responder_o (dec_first_responder) + .first_responder_o (dec_first_responder), + + .lookup_req_o (dec_lookup_req), + .collision_i (dec_collision) ); ccu_ctrl_snoop_unit #( @@ -253,4 +263,133 @@ always_comb begin end end +logic [SlvAxiIDWidth:0] b_inp_id; +logic [AxiAddrWidth-1:0] b_inp_data; +logic b_inp_req; +logic b_inp_gnt; + +logic [AxiAddrWidth-1:0] b_exists_data; +logic [AxiAddrWidth-1:0] b_exists_mask; +logic b_exists_req; +logic b_exists; +logic b_exists_gnt; + +logic [SlvAxiIDWidth:0] b_oup_id; +logic b_oup_pop; +logic b_oup_req; +logic [AxiAddrWidth-1:0] b_oup_data; +logic b_oup_data_valid; +logic b_oup_gnt; + +logic [SlvAxiIDWidth :0] r_inp_id; +logic [AxiAddrWidth-1:0] r_inp_data; +logic r_inp_req; +logic r_inp_gnt; + +logic [AxiAddrWidth-1:0] r_exists_data; +logic [AxiAddrWidth-1:0] r_exists_mask; +logic r_exists_req; +logic r_exists; +logic r_exists_gnt; + +logic [SlvAxiIDWidth:0] r_oup_id; +logic r_oup_pop; +logic r_oup_req; +logic [AxiAddrWidth-1:0] r_oup_data; +logic r_oup_data_valid; +logic r_oup_gnt; + +// Exists +assign dec_collision = (b_exists || r_exists); + +// _gnt is not used as it is combinationally set when req = 1 + + +assign b_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.aw.addr,dec_ccu_req_holder.aw.size); +assign b_exists_mask = {ariane_pkg::DCACHE_INDEX_WIDTH{1'b1}} << DCacheByteOffset; +assign b_exists_req = dec_lookup_req; + +assign r_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.ar.addr,dec_ccu_req_holder.ar.size); +assign r_exists_mask = {ariane_pkg::DCACHE_INDEX_WIDTH{1'b1}} << DCacheByteOffset; +assign r_exists_req = dec_lookup_req; + +// Oup +assign b_oup_id = ccu_resp_o.b.id; +assign b_oup_pop = 1'b1; +assign b_oup_req = ccu_resp_o.b_valid && ccu_req_i.b_ready; + +assign r_oup_id = ccu_resp_o.r.id; +assign r_oup_pop = 1'b1; +assign r_oup_req = ccu_resp_o.r_valid && ccu_req_i.r_ready && ccu_resp_o.r.last; + +// _data_* not used +// _gnt is not used as it is combinationally set when req = 1 + +// Inp +assign b_inp_id = ccu_req_i.aw.id; +assign b_inp_data = axi_pkg::aligned_addr(ccu_req_i.aw.addr,ccu_req_i.aw.size); +assign b_inp_req = ccu_req_i.aw_valid && ccu_resp_o.aw_ready; + +assign r_inp_id = ccu_req_i.ar.id; +assign r_inp_data = axi_pkg::aligned_addr(ccu_req_i.ar.addr,ccu_req_i.ar.size); +assign r_inp_req = ccu_req_i.ar_valid && ccu_resp_o.ar_ready; + + +typedef logic [AxiAddrWidth-1:0] id_queue_data_t; + +id_queue #( + .ID_WIDTH (SlvAxiIDWidth+1), + .CAPACITY (16), + .FULL_BW (1), + .data_t (id_queue_data_t) +) b_id_queue ( + .clk_i, + .rst_ni, + + .inp_id_i (b_inp_id), + .inp_data_i (b_inp_data), + .inp_req_i (b_inp_req), + .inp_gnt_o (b_inp_gnt), + + .exists_data_i (b_exists_data), + .exists_mask_i (b_exists_mask), + .exists_req_i (b_exists_req), + .exists_o (b_exists), + .exists_gnt_o (b_exists_gnt), + + .oup_id_i (b_oup_id), + .oup_pop_i (b_oup_pop), + .oup_req_i (b_oup_req), + .oup_data_o (b_oup_data), + .oup_data_valid_o (b_oup_data_valid), + .oup_gnt_o (b_oup_gnt) +); + +id_queue #( + .ID_WIDTH (SlvAxiIDWidth+1), + .CAPACITY (16), + .FULL_BW (1), + .data_t (id_queue_data_t) +) r_id_queue ( + .clk_i, + .rst_ni, + + .inp_id_i (r_inp_id), + .inp_data_i (r_inp_data), + .inp_req_i (r_inp_req), + .inp_gnt_o (r_inp_gnt), + + .exists_data_i (r_exists_data), + .exists_mask_i (r_exists_mask), + .exists_req_i (r_exists_req), + .exists_o (r_exists), + .exists_gnt_o (r_exists_gnt), + + .oup_id_i (r_oup_id), + .oup_pop_i (r_oup_pop), + .oup_req_i (r_oup_req), + .oup_data_o (r_oup_data), + .oup_data_valid_o (r_oup_data_valid), + .oup_gnt_o (r_oup_gnt) +); endmodule diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 0508628..7514f21 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -43,7 +43,10 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; output logic shared_o, output logic dirty_o, output logic [NoMstPorts-1:0] data_available_o, - output logic [MstIdxBits-1:0] first_responder_o + output logic [MstIdxBits-1:0] first_responder_o, + + output logic lookup_req_o, + input logic collision_i ); logic [NoMstPorts-1:0] initiator_d, initiator_q; @@ -52,6 +55,8 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; enum { IDLE, + DECODE_R, + DECODE_W, SEND_READ, SEND_INVALID_R, SEND_INVALID_W, @@ -76,7 +81,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; logic send_invalid_r; - assign send_invalid_r = ccu_req_i.ar.snoop == snoop_pkg::CLEAN_UNIQUE || ccu_req_i.ar.lock; + assign send_invalid_r = ccu_req_holder_q.ar.snoop == snoop_pkg::CLEAN_UNIQUE || ccu_req_holder_q.ar.lock; for (genvar i = 0; i < NoMstPorts; i = i + 1) begin assign ac_handshake[i] = m2s_resp_i[i].ac_ready & s2m_req_o[i].ac_valid; @@ -106,7 +111,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; always_ff @ (posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin ac_handshake_q[i] <= '0; - end else if(decode_r || decode_w) begin + end else if(state_q inside {DECODE_R, DECODE_W}) begin ac_handshake_q[i] <= initiator_d[i]; end else if(state_q inside {SEND_READ, SEND_INVALID_R, SEND_INVALID_W}) begin if (ac_handshake[i]) @@ -215,6 +220,8 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; decode_r = 1'b0; decode_w = 1'b0; + lookup_req_o = 1'b0; + case (state_q) IDLE: begin @@ -223,18 +230,31 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; // wait for incoming valid request from master if(ccu_req_i.ar_valid & prio_r) begin decode_r = 1'b1; - state_d = send_invalid_r ? SEND_INVALID_R : SEND_READ; + state_d = DECODE_R; initiator_d[ccu_req_i.ar.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; prio_d.waiting_w = ccu_req_i.aw_valid; end else if(ccu_req_i.aw_valid & prio_w) begin decode_w = 1'b1; - state_d = SEND_INVALID_W; + state_d = DECODE_W; initiator_d[ccu_req_i.aw.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; prio_d.waiting_r = ccu_req_i.ar_valid; end + end - slv_ar_ready_o = prio_r; - slv_aw_ready_o = prio_w; + DECODE_W: begin + lookup_req_o = 1'b1; + if (!collision_i) begin + state_d = SEND_INVALID_W; + slv_aw_ready_o = 1'b1; + end + end + + DECODE_R: begin + lookup_req_o = 1'b1; + if (!collision_i) begin + state_d = send_invalid_r ? SEND_INVALID_R : SEND_READ; + slv_ar_ready_o = 1'b1; + end end SEND_READ: begin diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 7767e4b..1884ef7 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -91,6 +91,8 @@ logic w_last_d, w_last_q; logic [$bits(ccu_resp_in.b.id)-1:0] wb_id_q, wb_id_d; +logic wb_expected_q; + always_comb begin mu_ready_o = 1'b0; ax_busy_d = ax_busy_q; @@ -146,7 +148,7 @@ always_comb begin // TODO: check if truly needed ax_op_d = AMO_WAIT_WB_R; else - ax_busy_d = !ccu_resp_in.w_ready; + ax_busy_d = 1'b0; end end SEND_AXI_REQ_W: begin @@ -154,7 +156,7 @@ always_comb begin // with the same ID of the writeback // TODO: add a bit to the ID to differentiate between WB issued // by the CCU and requests forwarded from the cores - if (wb_id_q != ccu_req_holder_q.aw.id || !cd_busy_o) begin + if (wb_id_q != ccu_req_holder_q.aw.id || !wb_expected_q) begin aw_valid_out = 'b1; aw_out = ccu_req_holder_q.aw; if (ccu_resp_in.aw_ready) begin @@ -163,7 +165,7 @@ always_comb begin // TODO: check if truly needed ax_op_d = AMO_WAIT_READ; else - ax_busy_d = !ccu_resp_in.w_ready; + ax_busy_d = 1'b0; end end end @@ -216,7 +218,7 @@ w_state_t w_state_q, w_state_d; logic [AxiDataWidth-1:0] fifo_data_in, fifo_data_out; logic [$clog2(DcacheLineWords)-1:0] fifo_usage; -enum { FIFO_IDLE, FIFO_LOWER_HALF, FIFO_UPPER_HALF, FIFO_WAIT } fifo_state_q, fifo_state_d; +enum { FIFO_IDLE, FIFO_LOWER_HALF, FIFO_UPPER_HALF, FIFO_WAIT_LAST_CD } fifo_state_q, fifo_state_d; always_ff @(posedge clk_i or negedge rst_ni) begin if(!rst_ni) begin @@ -228,10 +230,14 @@ always_ff @(posedge clk_i or negedge rst_ni) begin end end +logic [NoMstPorts-1:0] cd_last_q; + always_comb begin fifo_state_d = fifo_state_q; fifo_first_responder_d = fifo_first_responder_q; + fifo_push = 1'b0; + case (fifo_state_q) FIFO_IDLE: begin if (cd_data_incoming) begin @@ -241,26 +247,26 @@ always_comb begin end FIFO_LOWER_HALF: begin if(cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]) begin + fifo_push = 1'b1; fifo_state_d = FIFO_UPPER_HALF; end end FIFO_UPPER_HALF: begin if(cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]) begin - fifo_state_d = FIFO_WAIT; + fifo_push = 1'b1; + fifo_state_d = cd_last_q == data_available_q ? FIFO_IDLE : FIFO_WAIT_LAST_CD; end end - FIFO_WAIT: begin - // TODO: rework to get rid of this state - if (ccu_resp_in.b_valid && ccu_req_out.b_ready && ccu_resp_in.b.id == wb_id_q) + FIFO_WAIT_LAST_CD: begin + if (cd_last_q == data_available_q) fifo_state_d = FIFO_IDLE; end endcase end -assign cd_busy_o = fifo_state_q != FIFO_IDLE; -assign fifo_push = cd_busy_o && cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]; -assign fifo_flush = fifo_state_q == FIFO_IDLE; +assign cd_busy_o = cd_last_q != data_available_q; +assign fifo_flush = 1'b0; assign fifo_data_in = cd_i[fifo_first_responder_q].data; assign fifo_pop = w_state_q inside {W_FROM_FIFO_W, W_FROM_FIFO_R} ? ccu_resp_in.w_ready && ccu_req_out.w_valid : '0; @@ -283,13 +289,11 @@ assign fifo_pop = w_state_q inside {W_FROM_FIFO_W, W_FROM_FIFO_R} ? ccu_resp .pop_i (fifo_pop) ); -logic [NoMstPorts-1:0] cd_last_q; - for (genvar i = 0; i < NoMstPorts; i = i + 1) begin always_ff @ (posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin cd_last_q[i] <= '0; - end else if(!cd_busy_o) begin + end else if(fifo_state_q == FIFO_IDLE) begin cd_last_q[i] <= '0; end else if(cd_valid_i[i]) begin cd_last_q[i] <= (cd_i[i].last & data_available_q[i]); @@ -300,7 +304,7 @@ end always_comb begin cd_ready_o = '0; - if (cd_busy_o) begin + if (fifo_state_q != FIFO_IDLE) begin for (int i = 0; i < NoMstPorts; i = i + 1) begin cd_ready_o[i] = !cd_last_q[i] && data_available_q[i]; end @@ -337,8 +341,6 @@ always_ff @(posedge clk_i or negedge rst_ni) begin end end -logic wb_expected_q; - always_ff @(posedge clk_i or negedge rst_ni) begin if(!rst_ni) begin wb_expected_q <= 1'b0; From d4472aeb1946bcd698076c54409dab5a2c36733b Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 18 Apr 2024 16:26:51 +0200 Subject: [PATCH 009/109] Implement control on ID queues and change collision condition * Stall the decoder W/R requests if the respective queue is full * Collision now happens on equal tags --- src/ccu_ctrl.sv | 86 +++++++++++++++++++++-------------------- src/ccu_ctrl_decoder.sv | 8 ++-- 2 files changed, 49 insertions(+), 45 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 09e39dd..76cb6e1 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -46,6 +46,42 @@ localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; localparam int unsigned DCacheByteOffset = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8); localparam int unsigned MstIdxBits = $clog2(NoMstPorts); +logic [SlvAxiIDWidth:0] b_inp_id; +logic [AxiAddrWidth-1:0] b_inp_data; +logic b_inp_req; +logic b_inp_gnt; + +logic [AxiAddrWidth-1:0] b_exists_data; +logic [AxiAddrWidth-1:0] b_exists_mask; +logic b_exists_req; +logic b_exists; +logic b_exists_gnt; + +logic [SlvAxiIDWidth:0] b_oup_id; +logic b_oup_pop; +logic b_oup_req; +logic [AxiAddrWidth-1:0] b_oup_data; +logic b_oup_data_valid; +logic b_oup_gnt; + +logic [SlvAxiIDWidth :0] r_inp_id; +logic [AxiAddrWidth-1:0] r_inp_data; +logic r_inp_req; +logic r_inp_gnt; + +logic [AxiAddrWidth-1:0] r_exists_data; +logic [AxiAddrWidth-1:0] r_exists_mask; +logic r_exists_req; +logic r_exists; +logic r_exists_gnt; + +logic [SlvAxiIDWidth:0] r_oup_id; +logic r_oup_pop; +logic r_oup_req; +logic [AxiAddrWidth-1:0] r_oup_data; +logic r_oup_data_valid; +logic r_oup_gnt; + mst_resp_t mu_ccu_resp; mst_req_t mu_ccu_req; @@ -83,7 +119,7 @@ for (genvar i = 0; i < NoMstPorts; i++) begin assign cd_valid[i] = m2s_resp_i[i].cd_valid; end -logic dec_lookup_req, dec_collision; +logic dec_lookup_req, dec_collision, dec_b_queue_full, dec_r_queue_full; ccu_ctrl_decoder #( .DcacheLineWidth (DcacheLineWidth), @@ -127,7 +163,9 @@ ccu_ctrl_decoder #( .first_responder_o (dec_first_responder), .lookup_req_o (dec_lookup_req), - .collision_i (dec_collision) + .collision_i (dec_collision), + .b_queue_full_i (~b_inp_gnt), + .r_queue_full_i (~r_inp_gnt) ); ccu_ctrl_snoop_unit #( @@ -263,42 +301,6 @@ always_comb begin end end -logic [SlvAxiIDWidth:0] b_inp_id; -logic [AxiAddrWidth-1:0] b_inp_data; -logic b_inp_req; -logic b_inp_gnt; - -logic [AxiAddrWidth-1:0] b_exists_data; -logic [AxiAddrWidth-1:0] b_exists_mask; -logic b_exists_req; -logic b_exists; -logic b_exists_gnt; - -logic [SlvAxiIDWidth:0] b_oup_id; -logic b_oup_pop; -logic b_oup_req; -logic [AxiAddrWidth-1:0] b_oup_data; -logic b_oup_data_valid; -logic b_oup_gnt; - -logic [SlvAxiIDWidth :0] r_inp_id; -logic [AxiAddrWidth-1:0] r_inp_data; -logic r_inp_req; -logic r_inp_gnt; - -logic [AxiAddrWidth-1:0] r_exists_data; -logic [AxiAddrWidth-1:0] r_exists_mask; -logic r_exists_req; -logic r_exists; -logic r_exists_gnt; - -logic [SlvAxiIDWidth:0] r_oup_id; -logic r_oup_pop; -logic r_oup_req; -logic [AxiAddrWidth-1:0] r_oup_data; -logic r_oup_data_valid; -logic r_oup_gnt; - // Exists assign dec_collision = (b_exists || r_exists); @@ -306,11 +308,11 @@ assign dec_collision = (b_exists || r_exists); assign b_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.aw.addr,dec_ccu_req_holder.aw.size); -assign b_exists_mask = {ariane_pkg::DCACHE_INDEX_WIDTH{1'b1}} << DCacheByteOffset; +assign b_exists_mask = ~{DCacheByteOffset{1'b1}}; assign b_exists_req = dec_lookup_req; assign r_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.ar.addr,dec_ccu_req_holder.ar.size); -assign r_exists_mask = {ariane_pkg::DCACHE_INDEX_WIDTH{1'b1}} << DCacheByteOffset; +assign r_exists_mask = ~{DCacheByteOffset{1'b1}}; assign r_exists_req = dec_lookup_req; // Oup @@ -339,7 +341,7 @@ typedef logic [AxiAddrWidth-1:0] id_queue_data_t; id_queue #( .ID_WIDTH (SlvAxiIDWidth+1), - .CAPACITY (16), + .CAPACITY (4), .FULL_BW (1), .data_t (id_queue_data_t) ) b_id_queue ( @@ -367,7 +369,7 @@ id_queue #( id_queue #( .ID_WIDTH (SlvAxiIDWidth+1), - .CAPACITY (16), + .CAPACITY (4), .FULL_BW (1), .data_t (id_queue_data_t) ) r_id_queue ( diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 7514f21..e236c0c 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -46,7 +46,9 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; output logic [MstIdxBits-1:0] first_responder_o, output logic lookup_req_o, - input logic collision_i + input logic collision_i, + input logic b_queue_full_i, + input logic r_queue_full_i ); logic [NoMstPorts-1:0] initiator_d, initiator_q; @@ -243,7 +245,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; DECODE_W: begin lookup_req_o = 1'b1; - if (!collision_i) begin + if (!collision_i && !b_queue_full_i) begin state_d = SEND_INVALID_W; slv_aw_ready_o = 1'b1; end @@ -251,7 +253,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; DECODE_R: begin lookup_req_o = 1'b1; - if (!collision_i) begin + if (!collision_i && !r_queue_full_i) begin state_d = send_invalid_r ? SEND_INVALID_R : SEND_READ; slv_ar_ready_o = 1'b1; end From 07ac64b2ae46b761a1379a38dfde461f771bd45a Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 18 Apr 2024 16:44:54 +0200 Subject: [PATCH 010/109] Make collision condition parametric --- src/ccu_ctrl.sv | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 76cb6e1..7cba827 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -11,6 +11,7 @@ module ccu_ctrl import ccu_ctrl_pkg::*; parameter int unsigned AxiDataWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, + parameter bit CollisionOnSetOnly = 0, parameter type mst_aw_chan_t = logic, parameter type w_chan_t = logic, parameter type mst_b_chan_t = logic, @@ -308,11 +309,13 @@ assign dec_collision = (b_exists || r_exists); assign b_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.aw.addr,dec_ccu_req_holder.aw.size); -assign b_exists_mask = ~{DCacheByteOffset{1'b1}}; +assign b_exists_mask = CollisionOnSetOnly ? {ariane_pkg::DCACHE_INDEX_WIDTH{1'b1}} << DCacheByteOffset + : ~{DCacheByteOffset{1'b1}}; assign b_exists_req = dec_lookup_req; assign r_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.ar.addr,dec_ccu_req_holder.ar.size); -assign r_exists_mask = ~{DCacheByteOffset{1'b1}}; +assign r_exists_mask = CollisionOnSetOnly ? {ariane_pkg::DCACHE_INDEX_WIDTH{1'b1}} << DCacheByteOffset + : ~{DCacheByteOffset{1'b1}}; assign r_exists_req = dec_lookup_req; // Oup From b776ad51e0779754f55522f2dc62fe7430eb8460 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 18 Apr 2024 22:05:57 +0200 Subject: [PATCH 011/109] Fix some bugs affecting memory writebacks --- src/ccu_ctrl_memory_unit.sv | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 1884ef7..530061e 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -49,7 +49,7 @@ mst_resp_t ccu_resp_in; mst_req_t ccu_req_holder_q; logic [MstIdxBits-1:0] first_responder_q, fifo_first_responder_q, fifo_first_responder_d; -logic [NoMstPorts-1:0] data_available_q; +logic [NoMstPorts-1:0] data_available_q, fifo_data_available_q, fifo_data_available_d; logic sample_dec_data; @@ -129,9 +129,8 @@ always_comb begin end SEND_AXI_REQ_WRITE_BACK_R: begin wb_id_d = {first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}; - cd_data_incoming = 1'b1; // send writeback request - aw_valid_out = fifo_empty; + aw_valid_out = !wb_expected_q; aw_out = '0; //default aw_out.addr = ccu_req_holder_q.ar.addr; aw_out.addr[3:0] = 4'b0; // writeback is always full cache line @@ -142,7 +141,8 @@ always_comb begin // WRITEBACK aw_out.domain = 2'b00; aw_out.snoop = 3'b011; - if (ccu_resp_in.aw_ready && fifo_empty) begin + if (ccu_resp_in.aw_ready && !wb_expected_q) begin + cd_data_incoming = 1'b1; if (ccu_req_holder_q.ar.lock) // Blocking behavior for AMO operations // TODO: check if truly needed @@ -171,9 +171,8 @@ always_comb begin end SEND_AXI_REQ_WRITE_BACK_W: begin wb_id_d = {first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}; - cd_data_incoming = 1'b1; // send writeback request - aw_valid_out = fifo_empty; + aw_valid_out = !wb_expected_q; aw_out = '0; //default aw_out.addr = ccu_req_holder_q.aw.addr; aw_out.addr[3:0] = 4'b0; // writeback is always full cache line @@ -184,7 +183,8 @@ always_comb begin // WRITEBACK aw_out.domain = 2'b00; aw_out.snoop = 3'b011; - if (ccu_resp_in.aw_ready && fifo_empty) begin + if (ccu_resp_in.aw_ready && !wb_expected_q) begin + cd_data_incoming = 1'b1; if (ccu_req_holder_q.aw.atop[5]) ax_op_d = AMO_WAIT_WB_W; else @@ -224,9 +224,11 @@ always_ff @(posedge clk_i or negedge rst_ni) begin if(!rst_ni) begin fifo_state_q <= FIFO_IDLE; fifo_first_responder_q <= '0; + fifo_data_available_q <= '0; end else begin fifo_state_q <= fifo_state_d; fifo_first_responder_q <= fifo_first_responder_d; + fifo_data_available_q <= fifo_data_available_d; end end @@ -235,6 +237,7 @@ logic [NoMstPorts-1:0] cd_last_q; always_comb begin fifo_state_d = fifo_state_q; fifo_first_responder_d = fifo_first_responder_q; + fifo_data_available_d = fifo_data_available_q; fifo_push = 1'b0; @@ -243,6 +246,7 @@ always_comb begin if (cd_data_incoming) begin fifo_state_d = FIFO_LOWER_HALF; fifo_first_responder_d = first_responder_q; + fifo_data_available_d = data_available_q; end end FIFO_LOWER_HALF: begin @@ -254,18 +258,18 @@ always_comb begin FIFO_UPPER_HALF: begin if(cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]) begin fifo_push = 1'b1; - fifo_state_d = cd_last_q == data_available_q ? FIFO_IDLE : FIFO_WAIT_LAST_CD; + fifo_state_d = cd_last_q == fifo_data_available_q ? FIFO_IDLE : FIFO_WAIT_LAST_CD; end end FIFO_WAIT_LAST_CD: begin - if (cd_last_q == data_available_q) + if (cd_last_q == fifo_data_available_q) fifo_state_d = FIFO_IDLE; end endcase end -assign cd_busy_o = cd_last_q != data_available_q; +assign cd_busy_o = cd_last_q != fifo_data_available_q; assign fifo_flush = 1'b0; assign fifo_data_in = cd_i[fifo_first_responder_q].data; assign fifo_pop = w_state_q inside {W_FROM_FIFO_W, W_FROM_FIFO_R} ? ccu_resp_in.w_ready && ccu_req_out.w_valid : '0; @@ -296,7 +300,7 @@ for (genvar i = 0; i < NoMstPorts; i = i + 1) begin end else if(fifo_state_q == FIFO_IDLE) begin cd_last_q[i] <= '0; end else if(cd_valid_i[i]) begin - cd_last_q[i] <= (cd_i[i].last & data_available_q[i]); + cd_last_q[i] <= (cd_i[i].last & fifo_data_available_q[i]); end end end @@ -306,7 +310,7 @@ always_comb begin if (fifo_state_q != FIFO_IDLE) begin for (int i = 0; i < NoMstPorts; i = i + 1) begin - cd_ready_o[i] = !cd_last_q[i] && data_available_q[i]; + cd_ready_o[i] = !cd_last_q[i] && fifo_data_available_q[i]; end if (fifo_full) begin From 82b3093b0900c79517dfc0b7ab224771b799f301 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Sun, 21 Apr 2024 16:19:45 +0200 Subject: [PATCH 012/109] Add proper arbitration on CD channels --- src/ccu_ctrl.sv | 113 +++++++++++++++++++++++++++++------- src/ccu_ctrl_decoder.sv | 8 ++- src/ccu_ctrl_memory_unit.sv | 18 ++++-- src/ccu_ctrl_snoop_unit.sv | 14 ++++- 4 files changed, 124 insertions(+), 29 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 7cba827..ab3d563 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -100,8 +100,11 @@ logic dec_shared, dec_dirty; logic [MstIdxBits-1:0] dec_first_responder; -logic [NoMstPorts-1:0] su_cd_ready, mu_cd_ready; -logic su_cd_busy, mu_cd_busy; +snoop_cd_t [NoMstPorts-1:0] cd; +logic [NoMstPorts-1:0] cd_valid, mu_cd_valid, su_cd_valid; +logic [NoMstPorts-1:0] cd_ready, mu_cd_ready, su_cd_ready; +logic mu_cd_busy, su_cd_busy; +logic mu_cd_done, su_cd_done; mst_r_chan_t su_r; logic su_r_valid, su_r_ready; @@ -112,16 +115,10 @@ logic ccu_ar_ready, ccu_aw_ready; snoop_req_t [NoMstPorts-1:0] dec_snoop_req; -snoop_cd_t [NoMstPorts-1:0] cd; -logic [NoMstPorts-1:0] cd_valid; - -for (genvar i = 0; i < NoMstPorts; i++) begin - assign cd[i] = m2s_resp_i[i].cd; - assign cd_valid[i] = m2s_resp_i[i].cd_valid; -end - logic dec_lookup_req, dec_collision, dec_b_queue_full, dec_r_queue_full; +logic dec_cd_fifo_stall; + ccu_ctrl_decoder #( .DcacheLineWidth (DcacheLineWidth), .AxiDataWidth (AxiDataWidth), @@ -165,6 +162,7 @@ ccu_ctrl_decoder #( .lookup_req_o (dec_lookup_req), .collision_i (dec_collision), + .cd_fifo_stall_i (dec_cd_fifo_stall), .b_queue_full_i (~b_inp_gnt), .r_queue_full_i (~r_inp_gnt) ); @@ -193,9 +191,10 @@ ccu_ctrl_snoop_unit #( .r_valid_o (su_r_valid), .r_ready_i (su_r_ready), .cd_i (cd), - .cd_valid_i (cd_valid), + .cd_valid_i (su_cd_valid), .cd_ready_o (su_cd_ready), .cd_busy_o (su_cd_busy), + .cd_done_o (su_cd_done), .ccu_req_holder_i (dec_ccu_req_holder), .su_ready_o (su_ready), .su_valid_i (su_valid), @@ -234,9 +233,10 @@ ccu_ctrl_memory_unit #( .ccu_resp_i, .cd_i (cd), - .cd_valid_i (cd_valid), + .cd_valid_i (mu_cd_valid), .cd_ready_o (mu_cd_ready), .cd_busy_o (mu_cd_busy), + .cd_done_o (mu_cd_done), .ccu_req_holder_i (dec_ccu_req_holder), .mu_ready_o (mu_ready), @@ -291,15 +291,13 @@ always_comb begin ccu_resp_o.ar_ready = ccu_ar_ready; ccu_resp_o.aw_ready = ccu_aw_ready; +end - // Snoop - for (int unsigned i = 0; i < NoMstPorts; i++) begin - s2m_req_o[i] = '0; - s2m_req_o[i].ac = dec_snoop_req[i].ac; - s2m_req_o[i].ac_valid = dec_snoop_req[i].ac_valid; - s2m_req_o[i].cr_ready = dec_snoop_req[i].cr_ready; - s2m_req_o[i].cd_ready = su_cd_ready[i] || mu_cd_ready[i]; // TODO arb tree - end +// Snoop AC and CR +for (genvar i = 0; i < NoMstPorts; i++) begin + assign s2m_req_o[i].ac = dec_snoop_req[i].ac; + assign s2m_req_o[i].ac_valid = dec_snoop_req[i].ac_valid; + assign s2m_req_o[i].cr_ready = dec_snoop_req[i].cr_ready; end // Exists @@ -397,4 +395,79 @@ id_queue #( .oup_data_valid_o (r_oup_data_valid), .oup_gnt_o (r_oup_gnt) ); + +logic mu_wb_op, su_wb_op; + +logic cd_user_pop, cd_user_push, cd_user_empty, cd_user_full; + +typedef enum logic { MEMORY_UNIT, SNOOP_UNIT } cd_user_t; + +cd_user_t cd_user_in, cd_user_out; + +assign mu_wb_op = mu_op inside {SEND_AXI_REQ_WRITE_BACK_R, SEND_AXI_REQ_WRITE_BACK_W}; +assign su_wb_op = su_op == READ_SNP_DATA; + +assign dec_cd_fifo_stall = cd_user_full; + +always_comb begin + cd_user_push = 1'b0; + cd_user_in = '0; + if (mu_ready && mu_valid && mu_wb_op) begin + cd_user_push = 1'b1; + cd_user_in = MEMORY_UNIT; + end else if (su_ready && su_valid && su_wb_op) begin + cd_user_push = 1'b1; + cd_user_in = SNOOP_UNIT; + end +end + +always_comb begin + su_cd_valid = '0; + mu_cd_valid = '0; + cd_ready = '0; + cd_user_pop = 1'b0; + + if (mu_cd_busy || su_cd_busy) begin + case (cd_user_out) + MEMORY_UNIT: begin + mu_cd_valid = cd_valid; + cd_ready = mu_cd_ready; + cd_user_pop = mu_cd_done; + end + SNOOP_UNIT: begin + su_cd_valid = cd_valid; + cd_ready = su_cd_ready; + cd_user_pop = su_cd_done; + end + endcase + end +end + +for (genvar i = 0; i < NoMstPorts; i++) begin + assign cd[i] = m2s_resp_i[i].cd; + assign cd_valid[i] = m2s_resp_i[i].cd_valid; + assign s2m_req_o[i].cd_ready = cd_ready[i]; +end + +logic cd_user_out_temp; +assign cd_user_out = cd_user_t'(cd_user_out_temp); + +fifo_v3 #( + .FALL_THROUGH(0), + .DATA_WIDTH(1), + .DEPTH(4) +) cd_ordering_fifo_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i (1'b0), + .full_o (cd_user_full), + .empty_o (cd_user_empty), + .usage_o (), + .data_i (cd_user_in), + .push_i (cd_user_push), + .data_o (cd_user_out_temp), + .pop_i (cd_user_pop) +); + endmodule diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index e236c0c..67637d9 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -48,7 +48,9 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; output logic lookup_req_o, input logic collision_i, input logic b_queue_full_i, - input logic r_queue_full_i + input logic r_queue_full_i, + + input logic cd_fifo_stall_i ); logic [NoMstPorts-1:0] initiator_d, initiator_q; @@ -245,7 +247,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; DECODE_W: begin lookup_req_o = 1'b1; - if (!collision_i && !b_queue_full_i) begin + if (!collision_i && !b_queue_full_i && !cd_fifo_stall_i) begin state_d = SEND_INVALID_W; slv_aw_ready_o = 1'b1; end @@ -253,7 +255,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; DECODE_R: begin lookup_req_o = 1'b1; - if (!collision_i && !r_queue_full_i) begin + if (!collision_i && !r_queue_full_i && !cd_fifo_stall_i) begin state_d = send_invalid_r ? SEND_INVALID_R : SEND_READ; slv_ar_ready_o = 1'b1; end diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 530061e..f821944 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -33,6 +33,7 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; input logic [NoMstPorts-1:0] cd_valid_i, output logic [NoMstPorts-1:0] cd_ready_o, output logic cd_busy_o, + output logic cd_done_o, input mst_req_t ccu_req_holder_i, output logic mu_ready_o, @@ -241,6 +242,8 @@ always_comb begin fifo_push = 1'b0; + cd_done_o = 1'b0; + case (fifo_state_q) FIFO_IDLE: begin if (cd_data_incoming) begin @@ -258,24 +261,31 @@ always_comb begin FIFO_UPPER_HALF: begin if(cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]) begin fifo_push = 1'b1; - fifo_state_d = cd_last_q == fifo_data_available_q ? FIFO_IDLE : FIFO_WAIT_LAST_CD; + if (cd_last_q == fifo_data_available_q) begin + fifo_state_d = FIFO_IDLE; + cd_done_o = 1'b1; + end else begin + fifo_state_d = FIFO_WAIT_LAST_CD; + end end end FIFO_WAIT_LAST_CD: begin - if (cd_last_q == fifo_data_available_q) + if (cd_last_q == fifo_data_available_q) begin + cd_done_o = 1'b1; fifo_state_d = FIFO_IDLE; + end end endcase end -assign cd_busy_o = cd_last_q != fifo_data_available_q; +assign cd_busy_o = fifo_state_q != FIFO_IDLE; assign fifo_flush = 1'b0; assign fifo_data_in = cd_i[fifo_first_responder_q].data; assign fifo_pop = w_state_q inside {W_FROM_FIFO_W, W_FROM_FIFO_R} ? ccu_resp_in.w_ready && ccu_req_out.w_valid : '0; - fifo_v3 #( +fifo_v3 #( .FALL_THROUGH(0), .DATA_WIDTH(AxiDataWidth), .DEPTH(FIFO_DEPTH) diff --git a/src/ccu_ctrl_snoop_unit.sv b/src/ccu_ctrl_snoop_unit.sv index a923471..bfa0e72 100644 --- a/src/ccu_ctrl_snoop_unit.sv +++ b/src/ccu_ctrl_snoop_unit.sv @@ -31,6 +31,7 @@ module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; input logic [NoMstPorts-1:0] cd_valid_i, output logic [NoMstPorts-1:0] cd_ready_o, output logic cd_busy_o, + output logic cd_done_o, input mst_req_t ccu_req_holder_i, output logic su_ready_o, @@ -108,6 +109,8 @@ always_comb begin sample_dec_data = 1'b0; + cd_done_o = 1'b0; + case (state_q) IDLE: begin su_ready_o = 1'b1; @@ -177,7 +180,12 @@ always_comb begin if (r_ready_i) begin fifo_pop = 1'b1; - state_d = (cd_last_q == data_available_q) ? IDLE : WAIT_CD_LAST; + if (cd_last_q == data_available_q) begin + state_d = IDLE; + cd_done_o = 1'b1; + end else begin + state_d = WAIT_CD_LAST; + end end end end @@ -193,8 +201,10 @@ always_comb begin end WAIT_CD_LAST: begin - if (cd_last_q == data_available_q) + if (cd_last_q == data_available_q) begin state_d = IDLE; + cd_done_o = 1'b1; + end end endcase end From 7c39d21f001c5dad8c6c448c4da919a4cf3f1e46 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Sun, 21 Apr 2024 17:46:50 +0200 Subject: [PATCH 013/109] Minor embellishments of the code --- src/ccu_ctrl.sv | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index ab3d563..ea80247 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -52,8 +52,8 @@ logic [AxiAddrWidth-1:0] b_inp_data; logic b_inp_req; logic b_inp_gnt; -logic [AxiAddrWidth-1:0] b_exists_data; -logic [AxiAddrWidth-1:0] b_exists_mask; +logic [AxiAddrWidth-1:0] b_exists_data; +logic [AxiAddrWidth-1:0] b_exists_mask; logic b_exists_req; logic b_exists; logic b_exists_gnt; @@ -61,7 +61,7 @@ logic b_exists_gnt; logic [SlvAxiIDWidth:0] b_oup_id; logic b_oup_pop; logic b_oup_req; -logic [AxiAddrWidth-1:0] b_oup_data; +logic [AxiAddrWidth-1:0] b_oup_data; logic b_oup_data_valid; logic b_oup_gnt; @@ -70,8 +70,8 @@ logic [AxiAddrWidth-1:0] r_inp_data; logic r_inp_req; logic r_inp_gnt; -logic [AxiAddrWidth-1:0] r_exists_data; -logic [AxiAddrWidth-1:0] r_exists_mask; +logic [AxiAddrWidth-1:0] r_exists_data; +logic [AxiAddrWidth-1:0] r_exists_mask; logic r_exists_req; logic r_exists; logic r_exists_gnt; @@ -79,7 +79,7 @@ logic r_exists_gnt; logic [SlvAxiIDWidth:0] r_oup_id; logic r_oup_pop; logic r_oup_req; -logic [AxiAddrWidth-1:0] r_oup_data; +logic [AxiAddrWidth-1:0] r_oup_data; logic r_oup_data_valid; logic r_oup_gnt; From 506a41bb005a272eaca257e586bc0d5a904464d1 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Sun, 21 Apr 2024 17:48:30 +0200 Subject: [PATCH 014/109] First changes to harmonize the management of the CD channel --- src/ccu_ctrl.sv | 65 ++++++++++++++------------- src/ccu_ctrl_memory_unit.sv | 90 +++++++++++++------------------------ src/ccu_ctrl_snoop_unit.sv | 44 +++++++----------- 3 files changed, 80 insertions(+), 119 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index ea80247..cf96a9c 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -98,18 +98,17 @@ mst_req_t dec_ccu_req_holder; logic dec_shared, dec_dirty; -logic [MstIdxBits-1:0] dec_first_responder; +logic [MstIdxBits-1:0] dec_first_responder, cd_first_responder_in, cd_first_responder_out; snoop_cd_t [NoMstPorts-1:0] cd; logic [NoMstPorts-1:0] cd_valid, mu_cd_valid, su_cd_valid; logic [NoMstPorts-1:0] cd_ready, mu_cd_ready, su_cd_ready; -logic mu_cd_busy, su_cd_busy; logic mu_cd_done, su_cd_done; mst_r_chan_t su_r; logic su_r_valid, su_r_ready; -logic [NoMstPorts-1:0] data_available; +logic [NoMstPorts-1:0] data_available, cd_data_available_in, cd_data_available_out; logic ccu_ar_ready, ccu_aw_ready; @@ -187,22 +186,21 @@ ccu_ctrl_snoop_unit #( ) ccu_ctrl_snoop_unit_i ( .clk_i, .rst_ni, - .r_o (su_r), - .r_valid_o (su_r_valid), - .r_ready_i (su_r_ready), - .cd_i (cd), - .cd_valid_i (su_cd_valid), - .cd_ready_o (su_cd_ready), - .cd_busy_o (su_cd_busy), - .cd_done_o (su_cd_done), - .ccu_req_holder_i (dec_ccu_req_holder), - .su_ready_o (su_ready), - .su_valid_i (su_valid), - .su_op_i (su_op), - .shared_i (dec_shared), - .dirty_i (dec_dirty), - .data_available_i (data_available), - .first_responder_i (dec_first_responder) + .r_o (su_r), + .r_valid_o (su_r_valid), + .r_ready_i (su_r_ready), + .cd_i (cd), + .cd_valid_i (su_cd_valid), + .cd_ready_o (su_cd_ready), + .cd_done_o (su_cd_done), + .cd_data_available_i (cd_data_available_out), + .cd_first_responder_i (cd_first_responder_out), + .ccu_req_holder_i (dec_ccu_req_holder), + .su_ready_o (su_ready), + .su_valid_i (su_valid), + .su_op_i (su_op), + .shared_i (dec_shared), + .dirty_i (dec_dirty) ); ccu_ctrl_memory_unit #( @@ -232,17 +230,17 @@ ccu_ctrl_memory_unit #( .ccu_req_o, .ccu_resp_i, - .cd_i (cd), - .cd_valid_i (mu_cd_valid), - .cd_ready_o (mu_cd_ready), - .cd_busy_o (mu_cd_busy), - .cd_done_o (mu_cd_done), + .cd_i (cd), + .cd_valid_i (mu_cd_valid), + .cd_ready_o (mu_cd_ready), + .cd_done_o (mu_cd_done), + .cd_first_responder_i (cd_first_responder_out), + .cd_data_available_i (cd_data_available_out), .ccu_req_holder_i (dec_ccu_req_holder), .mu_ready_o (mu_ready), .mu_valid_i (mu_valid), .mu_op_i (mu_op), - .data_available_i (data_available), .first_responder_i (dec_first_responder) ); @@ -396,6 +394,8 @@ id_queue #( .oup_gnt_o (r_oup_gnt) ); +// CD arbitration + logic mu_wb_op, su_wb_op; logic cd_user_pop, cd_user_push, cd_user_empty, cd_user_full; @@ -427,7 +427,7 @@ always_comb begin cd_ready = '0; cd_user_pop = 1'b0; - if (mu_cd_busy || su_cd_busy) begin + if (!cd_user_empty) begin case (cd_user_out) MEMORY_UNIT: begin mu_cd_valid = cd_valid; @@ -449,12 +449,15 @@ for (genvar i = 0; i < NoMstPorts; i++) begin assign s2m_req_o[i].cd_ready = cd_ready[i]; end -logic cd_user_out_temp; -assign cd_user_out = cd_user_t'(cd_user_out_temp); +logic cd_user_out_temp, cd_user_in_temp; +assign cd_user_in_temp = logic'(cd_user_in); +assign cd_user_out = cd_user_t'(cd_user_out_temp); +assign cd_first_responder_in = dec_first_responder; +assign cd_data_available_in = data_available; fifo_v3 #( .FALL_THROUGH(0), - .DATA_WIDTH(1), + .DATA_WIDTH(1 + 2 * NoMstPorts), .DEPTH(4) ) cd_ordering_fifo_i ( .clk_i (clk_i), @@ -464,9 +467,9 @@ fifo_v3 #( .full_o (cd_user_full), .empty_o (cd_user_empty), .usage_o (), - .data_i (cd_user_in), + .data_i ({cd_user_in_temp, cd_first_responder_in, cd_data_available_in}), .push_i (cd_user_push), - .data_o (cd_user_out_temp), + .data_o ({cd_user_out_temp, cd_first_responder_out, cd_data_available_out}), .pop_i (cd_user_pop) ); diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index f821944..d8aec50 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -32,14 +32,15 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; input snoop_cd_t [NoMstPorts-1:0] cd_i, input logic [NoMstPorts-1:0] cd_valid_i, output logic [NoMstPorts-1:0] cd_ready_o, - output logic cd_busy_o, output logic cd_done_o, + input logic [NoMstPorts-1:0] cd_data_available_i, + input logic [MstIdxBits-1:0] cd_first_responder_i, + input mst_req_t ccu_req_holder_i, output logic mu_ready_o, input logic mu_valid_i, input mu_op_e mu_op_i, - input logic [NoMstPorts-1:0] data_available_i, input logic [MstIdxBits-1:0] first_responder_i ); @@ -49,10 +50,7 @@ mst_req_t ccu_req_out; mst_resp_t ccu_resp_in; mst_req_t ccu_req_holder_q; -logic [MstIdxBits-1:0] first_responder_q, fifo_first_responder_q, fifo_first_responder_d; -logic [NoMstPorts-1:0] data_available_q, fifo_data_available_q, fifo_data_available_d; - -logic sample_dec_data; +logic [MstIdxBits-1:0] first_responder_q; logic fifo_push, fifo_flush, fifo_pop, fifo_full, fifo_empty; @@ -60,11 +58,9 @@ always_ff @(posedge clk_i , negedge rst_ni) begin if(!rst_ni) begin ccu_req_holder_q <= '0; first_responder_q <= '0; - data_available_q <= '0; - end else if (sample_dec_data) begin + end else if (mu_ready_o && mu_valid_i) begin ccu_req_holder_q <= ccu_req_holder_i; first_responder_q <= first_responder_i; - data_available_q <= data_available_i; end end @@ -86,7 +82,7 @@ mst_aw_chan_t aw_out; logic ar_valid_out, aw_valid_out; -logic cd_data_incoming; +logic wb_expected_en; logic w_last_d, w_last_q; @@ -99,14 +95,12 @@ always_comb begin ax_busy_d = ax_busy_q; ax_op_d = ax_op_q; - sample_dec_data = 1'b0; - ar_out = '0; aw_out = '0; ar_valid_out = 1'b0; aw_valid_out = 1'b0; - cd_data_incoming = 1'b0; + wb_expected_en = 1'b0; wb_id_d = wb_id_q; @@ -114,7 +108,6 @@ always_comb begin 1'b0: begin mu_ready_o = 1'b1; if (mu_valid_i) begin - sample_dec_data = 1'b1; ax_op_d = mu_op_i; ax_busy_d = 1'b1; end @@ -143,7 +136,7 @@ always_comb begin aw_out.domain = 2'b00; aw_out.snoop = 3'b011; if (ccu_resp_in.aw_ready && !wb_expected_q) begin - cd_data_incoming = 1'b1; + wb_expected_en = 1'b1; if (ccu_req_holder_q.ar.lock) // Blocking behavior for AMO operations // TODO: check if truly needed @@ -185,7 +178,7 @@ always_comb begin aw_out.domain = 2'b00; aw_out.snoop = 3'b011; if (ccu_resp_in.aw_ready && !wb_expected_q) begin - cd_data_incoming = 1'b1; + wb_expected_en = 1'b1; if (ccu_req_holder_q.aw.atop[5]) ax_op_d = AMO_WAIT_WB_W; else @@ -219,58 +212,35 @@ w_state_t w_state_q, w_state_d; logic [AxiDataWidth-1:0] fifo_data_in, fifo_data_out; logic [$clog2(DcacheLineWords)-1:0] fifo_usage; -enum { FIFO_IDLE, FIFO_LOWER_HALF, FIFO_UPPER_HALF, FIFO_WAIT_LAST_CD } fifo_state_q, fifo_state_d; +enum { FIFO_IDLE, FIFO_BUSY } fifo_state_q, fifo_state_d; always_ff @(posedge clk_i or negedge rst_ni) begin if(!rst_ni) begin fifo_state_q <= FIFO_IDLE; - fifo_first_responder_q <= '0; - fifo_data_available_q <= '0; end else begin fifo_state_q <= fifo_state_d; - fifo_first_responder_q <= fifo_first_responder_d; - fifo_data_available_q <= fifo_data_available_d; end end logic [NoMstPorts-1:0] cd_last_q; +logic wb_op; + +assign wb_op = mu_op_i inside {SEND_AXI_REQ_WRITE_BACK_R, SEND_AXI_REQ_WRITE_BACK_W}; + always_comb begin fifo_state_d = fifo_state_q; - fifo_first_responder_d = fifo_first_responder_q; - fifo_data_available_d = fifo_data_available_q; - - fifo_push = 1'b0; cd_done_o = 1'b0; case (fifo_state_q) FIFO_IDLE: begin - if (cd_data_incoming) begin - fifo_state_d = FIFO_LOWER_HALF; - fifo_first_responder_d = first_responder_q; - fifo_data_available_d = data_available_q; - end - end - FIFO_LOWER_HALF: begin - if(cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]) begin - fifo_push = 1'b1; - fifo_state_d = FIFO_UPPER_HALF; + if (mu_valid_i && wb_op) begin + fifo_state_d = FIFO_BUSY; end end - FIFO_UPPER_HALF: begin - if(cd_valid_i[fifo_first_responder_q] && cd_ready_o[fifo_first_responder_q]) begin - fifo_push = 1'b1; - if (cd_last_q == fifo_data_available_q) begin - fifo_state_d = FIFO_IDLE; - cd_done_o = 1'b1; - end else begin - fifo_state_d = FIFO_WAIT_LAST_CD; - end - end - end - FIFO_WAIT_LAST_CD: begin - if (cd_last_q == fifo_data_available_q) begin + FIFO_BUSY: begin + if (cd_last_q == cd_data_available_i) begin cd_done_o = 1'b1; fifo_state_d = FIFO_IDLE; end @@ -279,9 +249,9 @@ always_comb begin end -assign cd_busy_o = fifo_state_q != FIFO_IDLE; +assign fifo_push = cd_valid_i[cd_first_responder_i] && cd_ready_o[cd_first_responder_i]; assign fifo_flush = 1'b0; -assign fifo_data_in = cd_i[fifo_first_responder_q].data; +assign fifo_data_in = cd_i[cd_first_responder_i].data; assign fifo_pop = w_state_q inside {W_FROM_FIFO_W, W_FROM_FIFO_R} ? ccu_resp_in.w_ready && ccu_req_out.w_valid : '0; @@ -303,14 +273,16 @@ fifo_v3 #( .pop_i (fifo_pop) ); +// TODO: unify cd_last handling + for (genvar i = 0; i < NoMstPorts; i = i + 1) begin always_ff @ (posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin cd_last_q[i] <= '0; - end else if(fifo_state_q == FIFO_IDLE) begin + end else if(cd_done_o) begin cd_last_q[i] <= '0; end else if(cd_valid_i[i]) begin - cd_last_q[i] <= (cd_i[i].last & fifo_data_available_q[i]); + cd_last_q[i] <= (cd_i[i].last & cd_data_available_i[i]); end end end @@ -318,14 +290,12 @@ end always_comb begin cd_ready_o = '0; - if (fifo_state_q != FIFO_IDLE) begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - cd_ready_o[i] = !cd_last_q[i] && fifo_data_available_q[i]; - end + for (int i = 0; i < NoMstPorts; i = i + 1) begin + cd_ready_o[i] = !cd_last_q[i] && cd_data_available_i[i]; + end - if (fifo_full) begin - cd_ready_o[fifo_first_responder_q] = 1'b0; - end + if (fifo_full) begin + cd_ready_o[cd_first_responder_i] = 1'b0; end end @@ -364,7 +334,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin ccu_resp_in.b.id == wb_id_q) begin wb_expected_q <= 1'b0; wb_id_q <= '0; - end else if(cd_data_incoming) begin + end else if(wb_expected_en) begin wb_expected_q <= 1'b1; wb_id_q <= wb_id_d; end diff --git a/src/ccu_ctrl_snoop_unit.sv b/src/ccu_ctrl_snoop_unit.sv index bfa0e72..ca8eed0 100644 --- a/src/ccu_ctrl_snoop_unit.sv +++ b/src/ccu_ctrl_snoop_unit.sv @@ -30,17 +30,16 @@ module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; input snoop_cd_t [NoMstPorts-1:0] cd_i, input logic [NoMstPorts-1:0] cd_valid_i, output logic [NoMstPorts-1:0] cd_ready_o, - output logic cd_busy_o, output logic cd_done_o, + input logic [NoMstPorts-1:0] cd_data_available_i, + input logic [MstIdxBits-1:0] cd_first_responder_i, input mst_req_t ccu_req_holder_i, output logic su_ready_o, input logic su_valid_i, input su_op_e su_op_i, input logic shared_i, - input logic dirty_i, - input logic [NoMstPorts-1:0] data_available_i, - input logic [MstIdxBits-1:0] first_responder_i + input logic dirty_i ); localparam FIFO_DEPTH = 2; @@ -61,22 +60,16 @@ logic sample_dec_data; mst_req_t ccu_req_holder_q; logic shared_q; logic dirty_q; -logic [MstIdxBits-1:0] first_responder_q; -logic [NoMstPorts-1:0] data_available_q; always_ff @(posedge clk_i , negedge rst_ni) begin if(!rst_ni) begin ccu_req_holder_q <= '0; shared_q <= '0; dirty_q <= '0; - first_responder_q <= '0; - data_available_q <= '0; end else if(sample_dec_data) begin ccu_req_holder_q <= ccu_req_holder_i; shared_q <= shared_i; dirty_q <= dirty_i; - first_responder_q <= first_responder_i; - data_available_q <= data_available_i; end end @@ -180,7 +173,7 @@ always_comb begin if (r_ready_i) begin fifo_pop = 1'b1; - if (cd_last_q == data_available_q) begin + if (cd_last_q == cd_data_available_i) begin state_d = IDLE; cd_done_o = 1'b1; end else begin @@ -201,7 +194,7 @@ always_comb begin end WAIT_CD_LAST: begin - if (cd_last_q == data_available_q) begin + if (cd_last_q == cd_data_available_i) begin state_d = IDLE; cd_done_o = 1'b1; end @@ -209,11 +202,9 @@ always_comb begin endcase end -assign cd_busy_o = !(state_q inside {IDLE, WAIT_R_READY}); - -assign fifo_push = cd_busy_o && cd_valid_i[first_responder_q] && cd_ready_o[first_responder_q]; -assign fifo_flush = !cd_busy_o; -assign fifo_data_in = cd_i[first_responder_q].data; +assign fifo_push = cd_valid_i[cd_first_responder_i] && cd_ready_o[cd_first_responder_i]; +assign fifo_flush = 1'b0; +assign fifo_data_in = cd_i[cd_first_responder_i].data; fifo_v3 #( @@ -234,15 +225,15 @@ assign fifo_data_in = cd_i[first_responder_q].data; .pop_i (fifo_pop) ); - +// TODO: unify cd_last handling for (genvar i = 0; i < NoMstPorts; i = i + 1) begin always_ff @ (posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin cd_last_q[i] <= '0; - end else if(!cd_busy_o) begin + end else if(cd_done_o) begin cd_last_q[i] <= '0; end else if(cd_valid_i[i]) begin - cd_last_q[i] <= (cd_i[i].last & data_available_q[i]); + cd_last_q[i] <= (cd_i[i].last & cd_data_available_i[i]); end end end @@ -250,16 +241,13 @@ end always_comb begin cd_ready_o = '0; - if (cd_busy_o) begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - cd_ready_o[i] = !cd_last_q[i] && data_available_q[i]; - end - - if (fifo_full) begin - cd_ready_o[first_responder_q] = 1'b0; - end + for (int i = 0; i < NoMstPorts; i = i + 1) begin + cd_ready_o[i] = !cd_last_q[i] && cd_data_available_i[i]; end + if (fifo_full) begin + cd_ready_o[cd_first_responder_i] = 1'b0; + end end endmodule From b3b0a75336a3ec57bddf466ccf6bc009e4c1624f Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Sun, 21 Apr 2024 19:54:16 +0200 Subject: [PATCH 015/109] Rework CD channel handling --- src/ccu_ctrl.sv | 85 +++++++++++++++++++++++-------------- src/ccu_ctrl_memory_unit.sv | 83 ++++-------------------------------- src/ccu_ctrl_snoop_unit.sv | 56 +++--------------------- 3 files changed, 69 insertions(+), 155 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index cf96a9c..f1e4e30 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -101,20 +101,22 @@ logic dec_shared, dec_dirty; logic [MstIdxBits-1:0] dec_first_responder, cd_first_responder_in, cd_first_responder_out; snoop_cd_t [NoMstPorts-1:0] cd; -logic [NoMstPorts-1:0] cd_valid, mu_cd_valid, su_cd_valid; -logic [NoMstPorts-1:0] cd_ready, mu_cd_ready, su_cd_ready; -logic mu_cd_done, su_cd_done; +snoop_cd_t cd_first_responder; +logic cd_handshake, mu_cd_handshake, su_cd_handshake; +logic [NoMstPorts-1:0] cd_valid; +logic [NoMstPorts-1:0] cd_ready; +logic [NoMstPorts-1:0] cd_data_available_in, cd_data_available_out; +logic [NoMstPorts-1:0] cd_last_q; +logic cd_fifo_full, mu_cd_fifo_full, su_cd_fifo_full; mst_r_chan_t su_r; logic su_r_valid, su_r_ready; -logic [NoMstPorts-1:0] data_available, cd_data_available_in, cd_data_available_out; - logic ccu_ar_ready, ccu_aw_ready; snoop_req_t [NoMstPorts-1:0] dec_snoop_req; -logic dec_lookup_req, dec_collision, dec_b_queue_full, dec_r_queue_full; +logic dec_lookup_req, dec_collision; logic dec_cd_fifo_stall; @@ -156,7 +158,7 @@ ccu_ctrl_decoder #( .mu_op_o (mu_op), .shared_o (dec_shared), .dirty_o (dec_dirty), - .data_available_o (data_available), + .data_available_o (cd_data_available_in), .first_responder_o (dec_first_responder), .lookup_req_o (dec_lookup_req), @@ -186,19 +188,21 @@ ccu_ctrl_snoop_unit #( ) ccu_ctrl_snoop_unit_i ( .clk_i, .rst_ni, + .r_o (su_r), .r_valid_o (su_r_valid), .r_ready_i (su_r_ready), - .cd_i (cd), - .cd_valid_i (su_cd_valid), - .cd_ready_o (su_cd_ready), - .cd_done_o (su_cd_done), - .cd_data_available_i (cd_data_available_out), - .cd_first_responder_i (cd_first_responder_out), + + .cd_i (cd_first_responder), + .cd_handshake_i (su_cd_handshake), + .cd_fifo_full_o (su_cd_fifo_full), + .ccu_req_holder_i (dec_ccu_req_holder), + .su_ready_o (su_ready), .su_valid_i (su_valid), .su_op_i (su_op), + .shared_i (dec_shared), .dirty_i (dec_dirty) ); @@ -230,12 +234,9 @@ ccu_ctrl_memory_unit #( .ccu_req_o, .ccu_resp_i, - .cd_i (cd), - .cd_valid_i (mu_cd_valid), - .cd_ready_o (mu_cd_ready), - .cd_done_o (mu_cd_done), - .cd_first_responder_i (cd_first_responder_out), - .cd_data_available_i (cd_data_available_out), + .cd_i (cd_first_responder), + .cd_handshake_i (mu_cd_handshake), + .cd_fifo_full_o (mu_cd_fifo_full), .ccu_req_holder_i (dec_ccu_req_holder), .mu_ready_o (mu_ready), @@ -404,6 +405,8 @@ typedef enum logic { MEMORY_UNIT, SNOOP_UNIT } cd_user_t; cd_user_t cd_user_in, cd_user_out; +logic cd_done; + assign mu_wb_op = mu_op inside {SEND_AXI_REQ_WRITE_BACK_R, SEND_AXI_REQ_WRITE_BACK_W}; assign su_wb_op = su_op == READ_SNP_DATA; @@ -422,27 +425,31 @@ always_comb begin end always_comb begin - su_cd_valid = '0; - mu_cd_valid = '0; - cd_ready = '0; - cd_user_pop = 1'b0; + su_cd_handshake = '0; + mu_cd_handshake = '0; + cd_fifo_full = '0; + cd_done = '0; if (!cd_user_empty) begin + cd_done = cd_last_q == cd_data_available_out; case (cd_user_out) MEMORY_UNIT: begin - mu_cd_valid = cd_valid; - cd_ready = mu_cd_ready; - cd_user_pop = mu_cd_done; + mu_cd_handshake = cd_handshake; + cd_fifo_full = mu_cd_fifo_full; end SNOOP_UNIT: begin - su_cd_valid = cd_valid; - cd_ready = su_cd_ready; - cd_user_pop = su_cd_done; + su_cd_handshake = cd_handshake; + cd_fifo_full = su_cd_fifo_full; end endcase end end +for (genvar i = 0; i < NoMstPorts; i++) begin + assign cd_ready[i] = (cd_first_responder_out == i && cd_fifo_full) ? '0 : + !cd_user_empty && !cd_last_q[i] && cd_data_available_out[i]; +end + for (genvar i = 0; i < NoMstPorts; i++) begin assign cd[i] = m2s_resp_i[i].cd; assign cd_valid[i] = m2s_resp_i[i].cd_valid; @@ -453,10 +460,11 @@ logic cd_user_out_temp, cd_user_in_temp; assign cd_user_in_temp = logic'(cd_user_in); assign cd_user_out = cd_user_t'(cd_user_out_temp); assign cd_first_responder_in = dec_first_responder; -assign cd_data_available_in = data_available; + +assign cd_user_pop = cd_done; fifo_v3 #( - .FALL_THROUGH(0), + .FALL_THROUGH(1), .DATA_WIDTH(1 + 2 * NoMstPorts), .DEPTH(4) ) cd_ordering_fifo_i ( @@ -473,4 +481,19 @@ fifo_v3 #( .pop_i (cd_user_pop) ); +for (genvar i = 0; i < NoMstPorts; i = i + 1) begin + always_ff @ (posedge clk_i, negedge rst_ni) begin + if(!rst_ni) begin + cd_last_q[i] <= '0; + end else if(cd_done) begin + cd_last_q[i] <= '0; + end else if(cd_valid[i]) begin + cd_last_q[i] <= (cd[i].last & cd_data_available_out[i]); + end + end +end + +assign cd_first_responder = cd[cd_first_responder_out]; +assign cd_handshake = cd_valid[cd_first_responder_out] && cd_ready[cd_first_responder_out]; + endmodule diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index d8aec50..30ecb54 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -29,12 +29,9 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; output mst_req_t ccu_req_o, input mst_resp_t ccu_resp_i, - input snoop_cd_t [NoMstPorts-1:0] cd_i, - input logic [NoMstPorts-1:0] cd_valid_i, - output logic [NoMstPorts-1:0] cd_ready_o, - output logic cd_done_o, - input logic [NoMstPorts-1:0] cd_data_available_i, - input logic [MstIdxBits-1:0] cd_first_responder_i, + input snoop_cd_t cd_i, + input logic cd_handshake_i, + output logic cd_fifo_full_o, input mst_req_t ccu_req_holder_i, @@ -212,48 +209,11 @@ w_state_t w_state_q, w_state_d; logic [AxiDataWidth-1:0] fifo_data_in, fifo_data_out; logic [$clog2(DcacheLineWords)-1:0] fifo_usage; -enum { FIFO_IDLE, FIFO_BUSY } fifo_state_q, fifo_state_d; - -always_ff @(posedge clk_i or negedge rst_ni) begin - if(!rst_ni) begin - fifo_state_q <= FIFO_IDLE; - end else begin - fifo_state_q <= fifo_state_d; - end -end - -logic [NoMstPorts-1:0] cd_last_q; - -logic wb_op; - -assign wb_op = mu_op_i inside {SEND_AXI_REQ_WRITE_BACK_R, SEND_AXI_REQ_WRITE_BACK_W}; - -always_comb begin - fifo_state_d = fifo_state_q; - - cd_done_o = 1'b0; - - case (fifo_state_q) - FIFO_IDLE: begin - if (mu_valid_i && wb_op) begin - fifo_state_d = FIFO_BUSY; - end - end - FIFO_BUSY: begin - if (cd_last_q == cd_data_available_i) begin - cd_done_o = 1'b1; - fifo_state_d = FIFO_IDLE; - end - end - endcase - -end - -assign fifo_push = cd_valid_i[cd_first_responder_i] && cd_ready_o[cd_first_responder_i]; -assign fifo_flush = 1'b0; -assign fifo_data_in = cd_i[cd_first_responder_i].data; -assign fifo_pop = w_state_q inside {W_FROM_FIFO_W, W_FROM_FIFO_R} ? ccu_resp_in.w_ready && ccu_req_out.w_valid : '0; - +assign fifo_push = cd_handshake_i; +assign fifo_flush = 1'b0; +assign fifo_data_in = cd_i.data; +assign fifo_pop = w_state_q inside {W_FROM_FIFO_W, W_FROM_FIFO_R} ? ccu_resp_in.w_ready && ccu_req_out.w_valid : '0; +assign cd_fifo_full_o = fifo_full; fifo_v3 #( .FALL_THROUGH(0), @@ -273,33 +233,6 @@ fifo_v3 #( .pop_i (fifo_pop) ); -// TODO: unify cd_last handling - -for (genvar i = 0; i < NoMstPorts; i = i + 1) begin - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - cd_last_q[i] <= '0; - end else if(cd_done_o) begin - cd_last_q[i] <= '0; - end else if(cd_valid_i[i]) begin - cd_last_q[i] <= (cd_i[i].last & cd_data_available_i[i]); - end - end -end - -always_comb begin - cd_ready_o = '0; - - for (int i = 0; i < NoMstPorts; i = i + 1) begin - cd_ready_o[i] = !cd_last_q[i] && cd_data_available_i[i]; - end - - if (fifo_full) begin - cd_ready_o[cd_first_responder_i] = 1'b0; - end - -end - // AR assign ccu_req_out.ar = ar_out; assign ccu_req_out.ar_valid = ar_valid_out; diff --git a/src/ccu_ctrl_snoop_unit.sv b/src/ccu_ctrl_snoop_unit.sv index ca8eed0..0d6ea6c 100644 --- a/src/ccu_ctrl_snoop_unit.sv +++ b/src/ccu_ctrl_snoop_unit.sv @@ -27,12 +27,9 @@ module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; output logic r_valid_o, input logic r_ready_i, - input snoop_cd_t [NoMstPorts-1:0] cd_i, - input logic [NoMstPorts-1:0] cd_valid_i, - output logic [NoMstPorts-1:0] cd_ready_o, - output logic cd_done_o, - input logic [NoMstPorts-1:0] cd_data_available_i, - input logic [MstIdxBits-1:0] cd_first_responder_i, + input snoop_cd_t cd_i, + input logic cd_handshake_i, + output logic cd_fifo_full_o, input mst_req_t ccu_req_holder_i, output logic su_ready_o, @@ -87,7 +84,7 @@ assign ar_addr_offset = ccu_req_holder_q.ar.addr[3]; logic fifo_full, fifo_empty, fifo_push, fifo_pop; -logic [NoMstPorts-1:0] cd_last_q; +assign cd_fifo_full_o = fifo_full; always_comb begin @@ -102,8 +99,6 @@ always_comb begin sample_dec_data = 1'b0; - cd_done_o = 1'b0; - case (state_q) IDLE: begin su_ready_o = 1'b1; @@ -173,12 +168,7 @@ always_comb begin if (r_ready_i) begin fifo_pop = 1'b1; - if (cd_last_q == cd_data_available_i) begin - state_d = IDLE; - cd_done_o = 1'b1; - end else begin - state_d = WAIT_CD_LAST; - end + state_d = IDLE; end end end @@ -192,19 +182,12 @@ always_comb begin if (r_ready_i) state_d = IDLE; end - - WAIT_CD_LAST: begin - if (cd_last_q == cd_data_available_i) begin - state_d = IDLE; - cd_done_o = 1'b1; - end - end endcase end -assign fifo_push = cd_valid_i[cd_first_responder_i] && cd_ready_o[cd_first_responder_i]; +assign fifo_push = cd_handshake_i; assign fifo_flush = 1'b0; -assign fifo_data_in = cd_i[cd_first_responder_i].data; +assign fifo_data_in = cd_i.data; fifo_v3 #( @@ -225,29 +208,4 @@ assign fifo_data_in = cd_i[cd_first_responder_i].data; .pop_i (fifo_pop) ); -// TODO: unify cd_last handling -for (genvar i = 0; i < NoMstPorts; i = i + 1) begin - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - cd_last_q[i] <= '0; - end else if(cd_done_o) begin - cd_last_q[i] <= '0; - end else if(cd_valid_i[i]) begin - cd_last_q[i] <= (cd_i[i].last & cd_data_available_i[i]); - end - end -end - -always_comb begin - cd_ready_o = '0; - - for (int i = 0; i < NoMstPorts; i = i + 1) begin - cd_ready_o[i] = !cd_last_q[i] && cd_data_available_i[i]; - end - - if (fifo_full) begin - cd_ready_o[cd_first_responder_i] = 1'b0; - end -end - endmodule From c015131c05b1eb90c21fff46c61503dfa08e226f Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Sun, 21 Apr 2024 19:57:59 +0200 Subject: [PATCH 016/109] Fix spurious assign to enum --- src/ccu_ctrl.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index f1e4e30..7beb6cf 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -414,7 +414,7 @@ assign dec_cd_fifo_stall = cd_user_full; always_comb begin cd_user_push = 1'b0; - cd_user_in = '0; + cd_user_in = MEMORY_UNIT; if (mu_ready && mu_valid && mu_wb_op) begin cd_user_push = 1'b1; cd_user_in = MEMORY_UNIT; From 81ad84dc8f97a661c33301884c64547b6187df30 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 22 Apr 2024 17:07:25 +0200 Subject: [PATCH 017/109] Fix Bender.yml --- Bender.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Bender.yml b/Bender.yml index 2fcd1ce..b881111 100644 --- a/Bender.yml +++ b/Bender.yml @@ -23,7 +23,7 @@ sources: # Level 2 - src/ace_trs_dec.sv - src/ccu_ctrl_decoder.sv - - src/ccu_ctrl_memory_units.sv + - src/ccu_ctrl_memory_unit.sv - src/ccu_ctrl_snoop_unit.sv # Level 3 - src/ccu_ctrl.sv From 340828cece77c8b424452d7a620b57ec6ec54cda Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 22 Apr 2024 17:32:03 +0200 Subject: [PATCH 018/109] Pass DCacheIndexWidth parameter through Cfg --- src/ace_ccu_top.sv | 1 + src/ace_pkg.sv | 1 + src/ccu_ctrl.sv | 7 ++++--- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/ace_ccu_top.sv b/src/ace_ccu_top.sv index 5afc120..a413bf2 100644 --- a/src/ace_ccu_top.sv +++ b/src/ace_ccu_top.sv @@ -219,6 +219,7 @@ axi_mux #( ccu_ctrl #( .DcacheLineWidth ( Cfg.DcacheLineWidth ), + .DCacheIndexWidth( Cfg.DCacheIndexWidth ), .AxiDataWidth ( Cfg.AxiDataWidth ), .NoMstPorts ( Cfg.NoSlvPorts ), .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index cbd0462..e4297ff 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -68,6 +68,7 @@ package ace_pkg; int unsigned AxiDataWidth; int unsigned AxiUserWidth; int unsigned DcacheLineWidth; + int unsigned DCacheIndexWidth; } ccu_cfg_t; // transaction type diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 7beb6cf..6304e75 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -8,6 +8,7 @@ module ccu_ctrl import ccu_ctrl_pkg::*; #( parameter int unsigned DcacheLineWidth = 0, + parameter int unsigned DCacheIndexWidth = 0, parameter int unsigned AxiDataWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, @@ -44,7 +45,7 @@ import ariane_pkg::*; localparam int unsigned AxiAddrWidth = 64; localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; -localparam int unsigned DCacheByteOffset = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8); +localparam int unsigned DCacheByteOffset = $clog2(DcacheLineWidth/8); localparam int unsigned MstIdxBits = $clog2(NoMstPorts); logic [SlvAxiIDWidth:0] b_inp_id; @@ -306,12 +307,12 @@ assign dec_collision = (b_exists || r_exists); assign b_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.aw.addr,dec_ccu_req_holder.aw.size); -assign b_exists_mask = CollisionOnSetOnly ? {ariane_pkg::DCACHE_INDEX_WIDTH{1'b1}} << DCacheByteOffset +assign b_exists_mask = CollisionOnSetOnly ? {DCacheIndexWidth{1'b1}} << DCacheByteOffset : ~{DCacheByteOffset{1'b1}}; assign b_exists_req = dec_lookup_req; assign r_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.ar.addr,dec_ccu_req_holder.ar.size); -assign r_exists_mask = CollisionOnSetOnly ? {ariane_pkg::DCACHE_INDEX_WIDTH{1'b1}} << DCacheByteOffset +assign r_exists_mask = CollisionOnSetOnly ? {DCacheIndexWidth{1'b1}} << DCacheByteOffset : ~{DCacheByteOffset{1'b1}}; assign r_exists_req = dec_lookup_req; From c332fa122b1149faeace4ae6ae4e639672ee81a0 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 23 Apr 2024 14:50:39 +0200 Subject: [PATCH 019/109] Update W and B channels handling in memory unit * Add one ID bit on the memory side * Add W FIFO to decouple Ax controller and W channel --- src/ace_ccu_top.sv | 44 +++++-- src/ccu_ctrl.sv | 123 ++++++++++-------- src/ccu_ctrl_decoder.sv | 18 +-- src/ccu_ctrl_memory_unit.sv | 251 ++++++++++++++++++------------------ src/ccu_ctrl_snoop_unit.sv | 18 +-- 5 files changed, 245 insertions(+), 209 deletions(-) diff --git a/src/ace_ccu_top.sv b/src/ace_ccu_top.sv index a413bf2..9301061 100644 --- a/src/ace_ccu_top.sv +++ b/src/ace_ccu_top.sv @@ -38,6 +38,13 @@ module ace_ccu_top parameter type mst_resp_t = logic, parameter type mst_stg_req_t = logic, parameter type mst_stg_resp_t = logic, + parameter type reqs_mux_aw_chan_t= logic, + parameter type reqs_mux_ar_chan_t= logic, + parameter type reqs_mux_w_chan_t = logic, + parameter type reqs_mux_r_chan_t = logic, + parameter type reqs_mux_b_chan_t = logic, + parameter type reqs_mux_req_t = logic, + parameter type reqs_mux_resp_t = logic, parameter type snoop_ac_t = logic, parameter type snoop_cr_t = logic, parameter type snoop_cd_t = logic, @@ -67,8 +74,8 @@ mst_stg_req_t [Cfg.NoSlvPorts:0] mst_reqs_tmp; slv_req_t [Cfg.NoSlvPorts-1:0] ccu_reqs_i; slv_resp_t [Cfg.NoSlvPorts-1:0] ccu_resps_o; // signals from the CCU -mst_stg_req_t ccu_reqs_mux_o; -mst_stg_resp_t ccu_resps_mux_i; +reqs_mux_req_t ccu_reqs_mux_o; +reqs_mux_resp_t ccu_resps_mux_i; mst_stg_req_t ccu_reqs_o; mst_stg_resp_t ccu_resps_i; @@ -122,7 +129,7 @@ for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_slv_port_demux end axi_mux #( - .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts+$clog2(Cfg.NoSlvPorts) ), // ID width of the slave ports + .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts+$clog2(Cfg.NoSlvPorts)+1 ), // ID width of the slave ports .slv_aw_chan_t ( mst_stg_aw_chan_t ), // AW Channel Type, slave ports .mst_aw_chan_t ( mst_aw_chan_t ), // AW Channel Type, master port .w_chan_t ( w_chan_t ), // W Channel Type, all ports @@ -187,18 +194,18 @@ end axi_mux #( .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports .slv_aw_chan_t ( slv_aw_chan_t ), // AW Channel Type, slave ports - .mst_aw_chan_t ( mst_stg_aw_chan_t ), // AW Channel Type, master port + .mst_aw_chan_t ( reqs_mux_aw_chan_t ), // AW Channel Type, master port .w_chan_t ( w_chan_t ), // W Channel Type, all ports .slv_b_chan_t ( slv_b_chan_t ), // B Channel Type, slave ports - .mst_b_chan_t ( mst_stg_b_chan_t ), // B Channel Type, master port + .mst_b_chan_t ( reqs_mux_b_chan_t ), // B Channel Type, master port .slv_ar_chan_t ( slv_ar_chan_t ), // AR Channel Type, slave ports - .mst_ar_chan_t ( mst_stg_ar_chan_t ), // AR Channel Type, master port + .mst_ar_chan_t ( reqs_mux_ar_chan_t ), // AR Channel Type, master port .slv_r_chan_t ( slv_r_chan_t ), // R Channel Type, slave ports - .mst_r_chan_t ( mst_stg_r_chan_t ), // R Channel Type, master port + .mst_r_chan_t ( reqs_mux_r_chan_t ), // R Channel Type, master port .slv_req_t ( slv_req_t ), .slv_resp_t ( slv_resp_t ), - .mst_req_t ( mst_stg_req_t ), - .mst_resp_t ( mst_stg_resp_t ), + .mst_req_t ( reqs_mux_req_t ), + .mst_resp_t ( reqs_mux_resp_t ), .NoSlvPorts ( Cfg.NoSlvPorts ), // Number of Masters for the modules .MaxWTrans ( Cfg.MaxMstTrans ), .FallThrough ( Cfg.FallThrough ), @@ -230,6 +237,12 @@ ccu_ctrl #( .mst_r_chan_t ( mst_stg_r_chan_t ), // R Channel Type, master port .mst_req_t ( mst_stg_req_t ), .mst_resp_t ( mst_stg_resp_t ), + .slv_aw_chan_t ( reqs_mux_aw_chan_t ), + .slv_b_chan_t ( reqs_mux_b_chan_t ), + .slv_ar_chan_t ( reqs_mux_ar_chan_t ), + .slv_r_chan_t ( reqs_mux_r_chan_t ), + .slv_req_t ( reqs_mux_req_t ), + .slv_resp_t ( reqs_mux_resp_t ), .snoop_ac_t ( snoop_ac_t ), .snoop_cr_t ( snoop_cr_t ), .snoop_cd_t ( snoop_cd_t ), @@ -265,9 +278,11 @@ module ace_ccu_top_intf AXI_BUS.Master mst_ports ); - localparam int unsigned AxiIdWidthMstPortsStage = Cfg.AxiIdWidthSlvPorts +$clog2(Cfg.NoSlvPorts); + localparam int unsigned AxiIdWidthReqsMux = Cfg.AxiIdWidthSlvPorts +$clog2(Cfg.NoSlvPorts); + localparam int unsigned AxiIdWidthMstPortsStage = AxiIdWidthReqsMux + 1; // Add one bit used by the CCU localparam int unsigned AxiIdWidthMstPorts = AxiIdWidthMstPortsStage + $clog2(Cfg.NoSlvPorts+1); + typedef logic [AxiIdWidthReqsMux -1:0] id_width_reqs_mux_t; typedef logic [AxiIdWidthMstPortsStage-1:0] id_mst_stg_t; typedef logic [AxiIdWidthMstPorts -1:0] id_mst_t; typedef logic [Cfg.AxiIdWidthSlvPorts -1:0] id_slv_t; @@ -302,6 +317,8 @@ module ace_ccu_top_intf `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + `ACE_TYPEDEF_ALL(reqs_mux, addr_t, id_width_reqs_mux_t, data_t, strb_t, user_t) + mst_ace_req_t mst_ace_reqs; mst_ace_resp_t mst_ace_resps; @@ -349,6 +366,13 @@ module ace_ccu_top_intf .mst_resp_t ( mst_ace_resp_t ), .mst_stg_req_t ( mst_ace_stg_req_t ), .mst_stg_resp_t ( mst_ace_stg_resp_t ), + .reqs_mux_aw_chan_t ( reqs_mux_aw_chan_t ), + .reqs_mux_ar_chan_t ( reqs_mux_ar_chan_t ), + .reqs_mux_w_chan_t ( reqs_mux_w_chan_t ), + .reqs_mux_r_chan_t ( reqs_mux_r_chan_t ), + .reqs_mux_b_chan_t ( reqs_mux_b_chan_t ), + .reqs_mux_req_t ( reqs_mux_req_t ), + .reqs_mux_resp_t ( reqs_mux_resp_t ), .snoop_ac_t ( snoop_ac_t ), .snoop_cr_t ( snoop_cr_t ), .snoop_cd_t ( snoop_cd_t ), diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 6304e75..785f428 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -5,7 +5,7 @@ `include "ace/assign.svh" `include "ace/typedef.svh" -module ccu_ctrl import ccu_ctrl_pkg::*; +module ccu_ctrl import ccu_ctrl_pkg::*; import axi_pkg::*; #( parameter int unsigned DcacheLineWidth = 0, parameter int unsigned DCacheIndexWidth = 0, @@ -20,6 +20,12 @@ module ccu_ctrl import ccu_ctrl_pkg::*; parameter type mst_r_chan_t = logic, parameter type mst_req_t = logic, parameter type mst_resp_t = logic, + parameter type slv_aw_chan_t = logic, + parameter type slv_b_chan_t = logic, + parameter type slv_ar_chan_t = logic, + parameter type slv_r_chan_t = logic, + parameter type slv_req_t = logic, + parameter type slv_resp_t = logic, parameter type snoop_ac_t = logic, parameter type snoop_cr_t = logic, parameter type snoop_cd_t = logic, @@ -30,8 +36,8 @@ module ccu_ctrl import ccu_ctrl_pkg::*; input clk_i, input rst_ni, // CCU Request In and response out - input mst_req_t ccu_req_i, - output mst_resp_t ccu_resp_o, + input slv_req_t ccu_req_i, + output slv_resp_t ccu_resp_o, //CCU Request Out and response in output mst_req_t ccu_req_o, input mst_resp_t ccu_resp_i, @@ -40,9 +46,6 @@ module ccu_ctrl import ccu_ctrl_pkg::*; input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i ); -import axi_pkg::*; -import ariane_pkg::*; - localparam int unsigned AxiAddrWidth = 64; localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; localparam int unsigned DCacheByteOffset = $clog2(DcacheLineWidth/8); @@ -85,8 +88,8 @@ logic r_oup_data_valid; logic r_oup_gnt; -mst_resp_t mu_ccu_resp; -mst_req_t mu_ccu_req; +slv_resp_t mu_ccu_resp; +slv_req_t mu_ccu_req; su_op_e su_op; mu_op_e mu_op; @@ -95,7 +98,7 @@ logic su_valid, mu_valid; logic su_ready, mu_ready; -mst_req_t dec_ccu_req_holder; +slv_req_t dec_ccu_req_holder; logic dec_shared, dec_dirty; @@ -110,7 +113,7 @@ logic [NoMstPorts-1:0] cd_data_available_in, cd_data_available_out; logic [NoMstPorts-1:0] cd_last_q; logic cd_fifo_full, mu_cd_fifo_full, su_cd_fifo_full; -mst_r_chan_t su_r; +slv_r_chan_t su_r; logic su_r_valid, su_r_ready; logic ccu_ar_ready, ccu_aw_ready; @@ -126,13 +129,13 @@ ccu_ctrl_decoder #( .AxiDataWidth (AxiDataWidth), .NoMstPorts (NoMstPorts), .SlvAxiIDWidth (SlvAxiIDWidth), - .mst_aw_chan_t (mst_aw_chan_t), + .slv_aw_chan_t (slv_aw_chan_t), .w_chan_t (w_chan_t), - .mst_b_chan_t (mst_b_chan_t), - .mst_ar_chan_t (mst_ar_chan_t), - .mst_r_chan_t (mst_r_chan_t), - .mst_req_t (mst_req_t), - .mst_resp_t (mst_resp_t), + .slv_b_chan_t (slv_b_chan_t), + .slv_ar_chan_t (slv_ar_chan_t), + .slv_r_chan_t (slv_r_chan_t), + .slv_req_t (slv_req_t), + .slv_resp_t (slv_resp_t), .snoop_ac_t (snoop_ac_t), .snoop_cr_t (snoop_cr_t), .snoop_cd_t (snoop_cd_t), @@ -174,13 +177,13 @@ ccu_ctrl_snoop_unit #( .AxiDataWidth (AxiDataWidth), .NoMstPorts (NoMstPorts), .SlvAxiIDWidth (SlvAxiIDWidth), - .mst_aw_chan_t (mst_aw_chan_t), + .slv_aw_chan_t (slv_aw_chan_t), .w_chan_t (w_chan_t), - .mst_b_chan_t (mst_b_chan_t), - .mst_ar_chan_t (mst_ar_chan_t), - .mst_r_chan_t (mst_r_chan_t), - .mst_req_t (mst_req_t), - .mst_resp_t (mst_resp_t), + .slv_b_chan_t (slv_b_chan_t), + .slv_ar_chan_t (slv_ar_chan_t), + .slv_r_chan_t (slv_r_chan_t), + .slv_req_t (slv_req_t), + .slv_resp_t (slv_resp_t), .snoop_ac_t (snoop_ac_t), .snoop_cr_t (snoop_cr_t), .snoop_cd_t (snoop_cd_t), @@ -220,6 +223,12 @@ ccu_ctrl_memory_unit #( .mst_r_chan_t (mst_r_chan_t), .mst_req_t (mst_req_t), .mst_resp_t (mst_resp_t), + .slv_aw_chan_t (slv_aw_chan_t), + .slv_b_chan_t (slv_b_chan_t), + .slv_ar_chan_t (slv_ar_chan_t), + .slv_r_chan_t (slv_r_chan_t), + .slv_req_t (slv_req_t), + .slv_resp_t (slv_resp_t), .snoop_ac_t (snoop_ac_t), .snoop_cr_t (snoop_cr_t), .snoop_cd_t (snoop_cd_t), @@ -246,40 +255,42 @@ ccu_ctrl_memory_unit #( .first_responder_i (dec_first_responder) ); - logic [1:0] r_valid_in, r_ready_in; - mst_r_chan_t [1:0] r_chans_in; +/////////////////// +// R arbitration // +/////////////////// - mst_r_chan_t r_chan_out; - logic r_valid_out, r_ready_out; +logic [1:0] r_valid_in, r_ready_in; +slv_r_chan_t [1:0] r_chans_in; - always_comb begin - mu_ccu_req = ccu_req_i; - - r_valid_in = {mu_ccu_resp.r_valid, su_r_valid}; - r_chans_in = {mu_ccu_resp.r, su_r}; - {mu_ccu_req.r_ready, su_r_ready} = r_ready_in; - end +slv_r_chan_t r_chan_out; +logic r_valid_out, r_ready_out; - rr_arb_tree #( - .NumIn ( 2 ), - .DataType ( mst_r_chan_t ), - .AxiVldRdy( 1'b1 ), - .LockIn ( 1'b1 ) - ) r_arbiter_i ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i( 1'b0 ), - .rr_i ( '0 ), - .req_i ( r_valid_in ), - .gnt_o ( r_ready_in ), - .data_i ( r_chans_in ), - .gnt_i ( r_ready_out ), - .req_o ( r_valid_out ), - .data_o ( r_chan_out ), - .idx_o ( ) - ); +always_comb begin + mu_ccu_req = ccu_req_i; + r_valid_in = {mu_ccu_resp.r_valid, su_r_valid}; + r_chans_in = {mu_ccu_resp.r, su_r}; + {mu_ccu_req.r_ready, su_r_ready} = r_ready_in; +end +rr_arb_tree #( + .NumIn ( 2 ), + .DataType ( slv_r_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) +) r_arbiter_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( r_valid_in ), + .gnt_o ( r_ready_in ), + .data_i ( r_chans_in ), + .gnt_i ( r_ready_out ), + .req_o ( r_valid_out ), + .data_o ( r_chan_out ), + .idx_o ( ) +); always_comb begin // Resp @@ -300,6 +311,10 @@ for (genvar i = 0; i < NoMstPorts; i++) begin assign s2m_req_o[i].cr_ready = dec_snoop_req[i].cr_ready; end +///////////////////// +// Collision Check // +///////////////////// + // Exists assign dec_collision = (b_exists || r_exists); @@ -396,7 +411,9 @@ id_queue #( .oup_gnt_o (r_oup_gnt) ); -// CD arbitration +//////////////////// +// CD arbitration // +//////////////////// logic mu_wb_op, su_wb_op; @@ -466,7 +483,7 @@ assign cd_user_pop = cd_done; fifo_v3 #( .FALL_THROUGH(1), - .DATA_WIDTH(1 + 2 * NoMstPorts), + .DATA_WIDTH(1 + NoMstPorts + MstIdxBits), .DEPTH(4) ) cd_ordering_fifo_i ( .clk_i (clk_i), diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 67637d9..882dcb6 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -4,13 +4,13 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; parameter int unsigned AxiDataWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, - parameter type mst_aw_chan_t = logic, + parameter type slv_aw_chan_t = logic, parameter type w_chan_t = logic, - parameter type mst_b_chan_t = logic, - parameter type mst_ar_chan_t = logic, - parameter type mst_r_chan_t = logic, - parameter type mst_req_t = logic, - parameter type mst_resp_t = logic, + parameter type slv_b_chan_t = logic, + parameter type slv_ar_chan_t = logic, + parameter type slv_r_chan_t = logic, + parameter type slv_req_t = logic, + parameter type slv_resp_t = logic, parameter type snoop_ac_t = logic, parameter type snoop_cr_t = logic, parameter type snoop_cd_t = logic, @@ -23,7 +23,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; input clk_i, input rst_ni, // CCU Request in - input mst_req_t ccu_req_i, + input slv_req_t ccu_req_i, // Snoop channel resuest and response output snoop_req_t [NoMstPorts-1:0] s2m_req_o, input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i, @@ -31,7 +31,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; output logic slv_aw_ready_o, output logic slv_ar_ready_o, - output mst_req_t ccu_req_holder_o, + output slv_req_t ccu_req_holder_o, output logic su_valid_o, input logic su_ready_i, @@ -93,7 +93,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; end // Hold incoming ACE request - mst_req_t ccu_req_holder_q; + slv_req_t ccu_req_holder_q; always_ff @(posedge clk_i , negedge rst_ni) begin if(!rst_ni) begin diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 30ecb54..c0e404a 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -11,6 +11,12 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; parameter type mst_r_chan_t = logic, parameter type mst_req_t = logic, parameter type mst_resp_t = logic, + parameter type slv_aw_chan_t = logic, + parameter type slv_b_chan_t = logic, + parameter type slv_ar_chan_t = logic, + parameter type slv_r_chan_t = logic, + parameter type slv_req_t = logic, + parameter type slv_resp_t = logic, parameter type snoop_ac_t = logic, parameter type snoop_cr_t = logic, parameter type snoop_cd_t = logic, @@ -23,8 +29,8 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; input clk_i, input rst_ni, // CCU Request In and response out - input mst_req_t ccu_req_i, - output mst_resp_t ccu_resp_o, + input slv_req_t ccu_req_i, + output slv_resp_t ccu_resp_o, //CCU Request Out and response in output mst_req_t ccu_req_o, input mst_resp_t ccu_resp_i, @@ -34,22 +40,25 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; output logic cd_fifo_full_o, - input mst_req_t ccu_req_holder_i, + input slv_req_t ccu_req_holder_i, output logic mu_ready_o, input logic mu_valid_i, input mu_op_e mu_op_i, input logic [MstIdxBits-1:0] first_responder_i ); -localparam FIFO_DEPTH = 2; +localparam CD_FIFO_DEPTH = 2; +localparam AXI_FIFO_DEPTH = 4; +localparam W_FIFO_DEPTH = 2; mst_req_t ccu_req_out; mst_resp_t ccu_resp_in; -mst_req_t ccu_req_holder_q; +slv_req_t ccu_req_holder_q; logic [MstIdxBits-1:0] first_responder_q; -logic fifo_push, fifo_flush, fifo_pop, fifo_full, fifo_empty; +logic cd_fifo_pop, cd_fifo_empty; +logic [AxiDataWidth-1:0] cd_fifo_data_out; always_ff @(posedge clk_i , negedge rst_ni) begin if(!rst_ni) begin @@ -79,13 +88,15 @@ mst_aw_chan_t aw_out; logic ar_valid_out, aw_valid_out; -logic wb_expected_en; - logic w_last_d, w_last_q; -logic [$bits(ccu_resp_in.b.id)-1:0] wb_id_q, wb_id_d; +typedef enum logic {W_PASSTHROUGH, W_FROM_FIFO} w_state_t; + +w_state_t w_state_in, w_state_out; -logic wb_expected_q; +logic w_fifo_full, w_fifo_empty; +logic w_fifo_push, w_fifo_pop; +w_state_t w_fifo_data_in, w_fifo_data_out; always_comb begin mu_ready_o = 1'b0; @@ -97,9 +108,8 @@ always_comb begin ar_valid_out = 1'b0; aw_valid_out = 1'b0; - wb_expected_en = 1'b0; - - wb_id_d = wb_id_q; + w_fifo_push = 1'b0; + w_fifo_data_in = W_PASSTHROUGH; case (ax_busy_q) 1'b0: begin @@ -119,21 +129,23 @@ always_comb begin end end SEND_AXI_REQ_WRITE_BACK_R: begin - wb_id_d = {first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}; // send writeback request - aw_valid_out = !wb_expected_q; + aw_valid_out = !w_fifo_full; aw_out = '0; //default aw_out.addr = ccu_req_holder_q.ar.addr; aw_out.addr[3:0] = 4'b0; // writeback is always full cache line aw_out.size = 2'b11; aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - aw_out.id = {first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations + aw_out.id = {1'b1, first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations aw_out.len = DcacheLineWords-1; // WRITEBACK aw_out.domain = 2'b00; aw_out.snoop = 3'b011; - if (ccu_resp_in.aw_ready && !wb_expected_q) begin - wb_expected_en = 1'b1; + + w_fifo_data_in = W_FROM_FIFO; + + if (ccu_resp_in.aw_ready && !w_fifo_full) begin + w_fifo_push = 1'b1; if (ccu_req_holder_q.ar.lock) // Blocking behavior for AMO operations // TODO: check if truly needed @@ -143,39 +155,39 @@ always_comb begin end end SEND_AXI_REQ_W: begin - // This is a hotfix to avoid serving requests from the core - // with the same ID of the writeback - // TODO: add a bit to the ID to differentiate between WB issued - // by the CCU and requests forwarded from the cores - if (wb_id_q != ccu_req_holder_q.aw.id || !wb_expected_q) begin - aw_valid_out = 'b1; - aw_out = ccu_req_holder_q.aw; - if (ccu_resp_in.aw_ready) begin - if (ccu_req_holder_q.aw.atop[5]) - // Blocking behavior for AMO operations - // TODO: check if truly needed - ax_op_d = AMO_WAIT_READ; - else - ax_busy_d = 1'b0; - end + aw_valid_out = !w_fifo_full; + aw_out = ccu_req_holder_q.aw; + + w_fifo_data_in = W_PASSTHROUGH; + + if (ccu_resp_in.aw_ready && !w_fifo_full) begin + w_fifo_push = 1'b1; + if (ccu_req_holder_q.aw.atop[5]) + // Blocking behavior for AMO operations + // TODO: check if truly needed + ax_op_d = AMO_WAIT_READ; + else + ax_busy_d = 1'b0; end end SEND_AXI_REQ_WRITE_BACK_W: begin - wb_id_d = {first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}; // send writeback request - aw_valid_out = !wb_expected_q; + aw_valid_out = !w_fifo_full; aw_out = '0; //default aw_out.addr = ccu_req_holder_q.aw.addr; aw_out.addr[3:0] = 4'b0; // writeback is always full cache line aw_out.size = 2'b11; aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - aw_out.id = {first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations + aw_out.id = {1'b1, first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations aw_out.len = DcacheLineWords-1; // WRITEBACK aw_out.domain = 2'b00; aw_out.snoop = 3'b011; - if (ccu_resp_in.aw_ready && !wb_expected_q) begin - wb_expected_en = 1'b1; + + w_fifo_data_in = W_FROM_FIFO; + + if (ccu_resp_in.aw_ready && !w_fifo_full) begin + w_fifo_push = 1'b1; if (ccu_req_holder_q.aw.atop[5]) ax_op_d = AMO_WAIT_WB_W; else @@ -189,12 +201,12 @@ always_comb begin end AMO_WAIT_WB_R: begin if(ccu_resp_in.b_valid && ccu_req_out.b_ready - && ccu_resp_in.b.id == {first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}) + && ccu_resp_in.b.id == {1'b1, first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}) ax_op_d = SEND_AXI_REQ_R; end AMO_WAIT_WB_W: begin if(ccu_resp_in.b_valid && ccu_req_out.b_ready && - ccu_resp_in.b.id == {first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}) + ccu_resp_in.b.id == {1'b1, first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}) ax_op_d = SEND_AXI_REQ_W; end endcase @@ -202,35 +214,25 @@ always_comb begin endcase end -typedef enum logic [1:0] {W_IDLE, W_PASSTHROUGH, W_FROM_FIFO_W, W_FROM_FIFO_R} w_state_t; - -w_state_t w_state_q, w_state_d; - -logic [AxiDataWidth-1:0] fifo_data_in, fifo_data_out; -logic [$clog2(DcacheLineWords)-1:0] fifo_usage; - -assign fifo_push = cd_handshake_i; -assign fifo_flush = 1'b0; -assign fifo_data_in = cd_i.data; -assign fifo_pop = w_state_q inside {W_FROM_FIFO_W, W_FROM_FIFO_R} ? ccu_resp_in.w_ready && ccu_req_out.w_valid : '0; -assign cd_fifo_full_o = fifo_full; +assign cd_fifo_pop = w_fifo_data_out inside {W_FROM_FIFO} && + ccu_resp_in.w_ready && ccu_req_out.w_valid; fifo_v3 #( .FALL_THROUGH(0), .DATA_WIDTH(AxiDataWidth), - .DEPTH(FIFO_DEPTH) + .DEPTH(CD_FIFO_DEPTH) ) cd_memory_fifo_i ( .clk_i (clk_i), .rst_ni (rst_ni), - .flush_i (fifo_flush), + .flush_i (1'b0), .testmode_i (1'b0), - .full_o (fifo_full), - .empty_o (fifo_empty), - .usage_o (fifo_usage), - .data_i (fifo_data_in), - .push_i (fifo_push), - .data_o (fifo_data_out), - .pop_i (fifo_pop) + .full_o (cd_fifo_full_o), + .empty_o (cd_fifo_empty), + .usage_o (), + .data_i (cd_i.data), + .push_i (cd_handshake_i), + .data_o (cd_fifo_data_out), + .pop_i (cd_fifo_pop) ); // AR @@ -250,95 +252,88 @@ assign ccu_req_out.r_ready = ccu_req_i.r_ready; always_ff @(posedge clk_i or negedge rst_ni) begin if(!rst_ni) begin - w_state_q <= W_IDLE; w_last_q <= 1'b0; end else begin - w_state_q <= w_state_d; w_last_q <= w_last_d; end end -always_ff @(posedge clk_i or negedge rst_ni) begin - if(!rst_ni) begin - wb_expected_q <= 1'b0; - wb_id_q <= '0; - end else if(ccu_resp_in.b_valid && - ccu_req_out.b_ready && - ccu_resp_in.b.id == wb_id_q) begin - wb_expected_q <= 1'b0; - wb_id_q <= '0; - end else if(wb_expected_en) begin - wb_expected_q <= 1'b1; - wb_id_q <= wb_id_d; - end -end +logic w_fifo_data_in_temp, w_fifo_data_out_temp; +assign w_fifo_data_in_temp = logic'(w_fifo_data_in); +assign w_fifo_data_out = w_state_t'(w_fifo_data_out_temp); +fifo_v3 #( + .FALL_THROUGH(0), + .DATA_WIDTH($bits(w_state_t)), + .DEPTH(W_FIFO_DEPTH) + ) w_fifo_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i (1'b0), + .full_o (w_fifo_full), + .empty_o (w_fifo_empty), + .usage_o (), + .data_i (w_fifo_data_in_temp), + .push_i (w_fifo_push), + .data_o (w_fifo_data_out_temp), + .pop_i (w_fifo_pop) +); -always_comb begin - w_last_d = w_last_q; - w_state_d = w_state_q; + +always_comb begin ccu_req_out.w = ccu_req_i.w; ccu_req_out.w_valid = 1'b0; ccu_resp_o.w_ready = 1'b0; - case (w_state_q) - W_IDLE: begin - w_last_d = 1'b0; - if (ax_busy_q && ccu_req_out.aw_valid) begin - case (ax_op_q) - SEND_AXI_REQ_WRITE_BACK_W: begin - w_state_d = W_FROM_FIFO_W; - end - SEND_AXI_REQ_WRITE_BACK_R: - w_state_d = W_FROM_FIFO_R; - SEND_AXI_REQ_W: begin - w_state_d = W_PASSTHROUGH; - end - default: - w_state_d = W_IDLE; - endcase - end - end - W_PASSTHROUGH: begin - ccu_req_out.w_valid = ccu_req_i.w_valid; - ccu_resp_o.w_ready = ccu_resp_in.w_ready; + w_fifo_pop = 1'b0; - if(ccu_resp_in.w_ready && ccu_req_i.w_valid && ccu_req_i.w.last) - w_state_d = W_IDLE; - end - W_FROM_FIFO_R, W_FROM_FIFO_W: begin - // Connect the FIFO as long as the transmission is ongoing - w_last_d = ccu_resp_in.w_ready && !fifo_empty; - ccu_req_out.w_valid = !fifo_empty; - ccu_req_out.w.strb = '1; - ccu_req_out.w.data = fifo_data_out; - ccu_req_out.w.last = w_last_q; - - if(ccu_resp_in.w_ready && !fifo_empty && w_last_q) - if (w_state_q == W_FROM_FIFO_W) begin - // This checks is just to ensure that the cores have visibility - // on the W channel only when we actually want to write something - // Removing it would cause a premature forwarding of a W req - // TODO: make this less convoluted - w_state_d = ax_busy_q && ax_op_q == AMO_WAIT_WB_W ? W_IDLE : W_PASSTHROUGH; - end else begin - w_state_d = W_IDLE; + w_last_d = w_last_q; + + if (!w_fifo_empty) begin + case (w_fifo_data_out) + W_PASSTHROUGH: begin + ccu_req_out.w_valid = ccu_req_i.w_valid; + ccu_resp_o.w_ready = ccu_resp_in.w_ready; + + if(ccu_resp_in.w_ready && ccu_req_i.w_valid && ccu_req_i.w.last) + w_fifo_pop = 1'b1; + end + W_FROM_FIFO: begin + // Connect the FIFO as long as the transmission is ongoing + w_last_d = ccu_resp_in.w_ready && !cd_fifo_empty; + ccu_req_out.w_valid = !cd_fifo_empty; + ccu_req_out.w.strb = '1; + ccu_req_out.w.data = cd_fifo_data_out; + ccu_req_out.w.last = w_last_q; + + if(ccu_resp_in.w_ready && !cd_fifo_empty && w_last_q) begin + w_last_d = 1'b0; + w_fifo_pop = 1'b1; end - end - endcase + end + endcase + end end -assign ccu_resp_o.b = ccu_resp_in.b; +assign ccu_resp_o.b = ccu_resp_in.b; + +// An additional bit in the ID is used to verify whether the CCU +// issued the request or simply forwarded one from the core +logic is_wb_resp; +assign is_wb_resp = (ccu_resp_in.b.id[SlvAxiIDWidth+$clog2(NoMstPorts)] == 1'b1); always_comb begin ccu_req_out.b_ready = 1'b0; ccu_resp_o.b_valid = 1'b0; - if (wb_expected_q && ccu_resp_in.b.id == wb_id_q) begin + if (is_wb_resp) begin + // Response to a WB issued by the CCU ccu_req_out.b_ready = 'b1; end else begin + // Response to a core request ccu_req_out.b_ready = ccu_req_i.b_ready; ccu_resp_o.b_valid = ccu_resp_in.b_valid; end @@ -346,7 +341,7 @@ end axi_fifo #( - .Depth (4), + .Depth (AXI_FIFO_DEPTH), .aw_chan_t (mst_aw_chan_t), .w_chan_t (w_chan_t), .b_chan_t (mst_b_chan_t), @@ -359,10 +354,10 @@ axi_fifo #( .rst_ni, .test_i (1'b0), // slave port - .slv_req_i (ccu_req_out), + .slv_req_i (ccu_req_out), .slv_resp_o (ccu_resp_in), // master port - .mst_req_o (ccu_req_o), + .mst_req_o (ccu_req_o), .mst_resp_i (ccu_resp_i) ); diff --git a/src/ccu_ctrl_snoop_unit.sv b/src/ccu_ctrl_snoop_unit.sv index 0d6ea6c..1d2bfdd 100644 --- a/src/ccu_ctrl_snoop_unit.sv +++ b/src/ccu_ctrl_snoop_unit.sv @@ -4,13 +4,13 @@ module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; parameter int unsigned AxiDataWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, - parameter type mst_aw_chan_t = logic, + parameter type slv_aw_chan_t = logic, parameter type w_chan_t = logic, - parameter type mst_b_chan_t = logic, - parameter type mst_ar_chan_t = logic, - parameter type mst_r_chan_t = logic, - parameter type mst_req_t = logic, - parameter type mst_resp_t = logic, + parameter type slv_b_chan_t = logic, + parameter type slv_ar_chan_t = logic, + parameter type slv_r_chan_t = logic, + parameter type slv_req_t = logic, + parameter type slv_resp_t = logic, parameter type snoop_ac_t = logic, parameter type snoop_cr_t = logic, parameter type snoop_cd_t = logic, @@ -23,7 +23,7 @@ module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; input clk_i, input rst_ni, // CCU Request In and response out - output mst_r_chan_t r_o, + output slv_r_chan_t r_o, output logic r_valid_o, input logic r_ready_i, @@ -31,7 +31,7 @@ module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; input logic cd_handshake_i, output logic cd_fifo_full_o, - input mst_req_t ccu_req_holder_i, + input slv_req_t ccu_req_holder_i, output logic su_ready_o, input logic su_valid_i, input su_op_e su_op_i, @@ -54,7 +54,7 @@ logic [$clog2(DcacheLineWords)-1:0] fifo_usage; logic sample_dec_data; -mst_req_t ccu_req_holder_q; +slv_req_t ccu_req_holder_q; logic shared_q; logic dirty_q; From 63d2e6fb0d247040db40579c3f0d0a49e2c5b2a5 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 23 Apr 2024 19:45:56 +0200 Subject: [PATCH 020/109] Save only the relevant part of the address in ID queues --- src/ccu_ctrl.sv | 109 ++++++++++++++++++++---------------- src/ccu_ctrl_memory_unit.sv | 4 +- 2 files changed, 63 insertions(+), 50 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 785f428..381041b 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -51,41 +51,47 @@ localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; localparam int unsigned DCacheByteOffset = $clog2(DcacheLineWidth/8); localparam int unsigned MstIdxBits = $clog2(NoMstPorts); -logic [SlvAxiIDWidth:0] b_inp_id; -logic [AxiAddrWidth-1:0] b_inp_data; -logic b_inp_req; -logic b_inp_gnt; - -logic [AxiAddrWidth-1:0] b_exists_data; -logic [AxiAddrWidth-1:0] b_exists_mask; -logic b_exists_req; -logic b_exists; -logic b_exists_gnt; - -logic [SlvAxiIDWidth:0] b_oup_id; -logic b_oup_pop; -logic b_oup_req; -logic [AxiAddrWidth-1:0] b_oup_data; -logic b_oup_data_valid; -logic b_oup_gnt; - -logic [SlvAxiIDWidth :0] r_inp_id; -logic [AxiAddrWidth-1:0] r_inp_data; -logic r_inp_req; -logic r_inp_gnt; - -logic [AxiAddrWidth-1:0] r_exists_data; -logic [AxiAddrWidth-1:0] r_exists_mask; -logic r_exists_req; -logic r_exists; -logic r_exists_gnt; - -logic [SlvAxiIDWidth:0] r_oup_id; -logic r_oup_pop; -logic r_oup_req; -logic [AxiAddrWidth-1:0] r_oup_data; -logic r_oup_data_valid; -logic r_oup_gnt; +localparam int unsigned IdQueueDataWidth = CollisionOnSetOnly ? + DCacheIndexWidth : + AxiAddrWidth - DCacheByteOffset; + +typedef logic [IdQueueDataWidth-1:0] id_queue_data_t; + +logic [SlvAxiIDWidth:0] b_inp_id; +id_queue_data_t b_inp_data; +logic b_inp_req; +logic b_inp_gnt; + +id_queue_data_t b_exists_data; +id_queue_data_t b_exists_mask; +logic b_exists_req; +logic b_exists; +logic b_exists_gnt; + +logic [SlvAxiIDWidth:0] b_oup_id; +logic b_oup_pop; +logic b_oup_req; +id_queue_data_t b_oup_data; +logic b_oup_data_valid; +logic b_oup_gnt; + +logic [SlvAxiIDWidth:0] r_inp_id; +id_queue_data_t r_inp_data; +logic r_inp_req; +logic r_inp_gnt; + +id_queue_data_t r_exists_data; +id_queue_data_t r_exists_mask; +logic r_exists_req; +logic r_exists; +logic r_exists_gnt; + +logic [SlvAxiIDWidth:0] r_oup_id; +logic r_oup_pop; +logic r_oup_req; +id_queue_data_t r_oup_data; +logic r_oup_data_valid; +logic r_oup_gnt; slv_resp_t mu_ccu_resp; @@ -315,20 +321,32 @@ end // Collision Check // ///////////////////// +localparam logic [AxiAddrWidth-1:0] EXISTS_MASK = CollisionOnSetOnly ? + {DCacheIndexWidth{1'b1}} << DCacheByteOffset : + ~{DCacheByteOffset{1'b1}}; + +logic [AxiAddrWidth-1:0] b_inp_aligned_addr; +logic [AxiAddrWidth-1:0] b_exists_aligned_addr; +logic [AxiAddrWidth-1:0] r_inp_aligned_addr; +logic [AxiAddrWidth-1:0] r_exists_aligned_addr; + +assign b_inp_aligned_addr = axi_pkg::aligned_addr(ccu_req_i.aw.addr,ccu_req_i.aw.size); +assign b_exists_aligned_addr = axi_pkg::aligned_addr(dec_ccu_req_holder.aw.addr,dec_ccu_req_holder.aw.size); + +assign r_inp_aligned_addr = axi_pkg::aligned_addr(ccu_req_i.ar.addr,ccu_req_i.ar.size); +assign r_exists_aligned_addr = axi_pkg::aligned_addr(dec_ccu_req_holder.ar.addr,dec_ccu_req_holder.ar.size); + // Exists assign dec_collision = (b_exists || r_exists); // _gnt is not used as it is combinationally set when req = 1 - -assign b_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.aw.addr,dec_ccu_req_holder.aw.size); -assign b_exists_mask = CollisionOnSetOnly ? {DCacheIndexWidth{1'b1}} << DCacheByteOffset - : ~{DCacheByteOffset{1'b1}}; +assign b_exists_data = b_exists_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; +assign b_exists_mask = '1; assign b_exists_req = dec_lookup_req; -assign r_exists_data = axi_pkg::aligned_addr(dec_ccu_req_holder.ar.addr,dec_ccu_req_holder.ar.size); -assign r_exists_mask = CollisionOnSetOnly ? {DCacheIndexWidth{1'b1}} << DCacheByteOffset - : ~{DCacheByteOffset{1'b1}}; +assign r_exists_data = r_exists_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; +assign r_exists_mask = '1; assign r_exists_req = dec_lookup_req; // Oup @@ -345,16 +363,13 @@ assign r_oup_req = ccu_resp_o.r_valid && ccu_req_i.r_ready && ccu_resp_o.r.last; // Inp assign b_inp_id = ccu_req_i.aw.id; -assign b_inp_data = axi_pkg::aligned_addr(ccu_req_i.aw.addr,ccu_req_i.aw.size); +assign b_inp_data = b_inp_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; assign b_inp_req = ccu_req_i.aw_valid && ccu_resp_o.aw_ready; assign r_inp_id = ccu_req_i.ar.id; -assign r_inp_data = axi_pkg::aligned_addr(ccu_req_i.ar.addr,ccu_req_i.ar.size); +assign r_inp_data = r_inp_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; assign r_inp_req = ccu_req_i.ar_valid && ccu_resp_o.ar_ready; - -typedef logic [AxiAddrWidth-1:0] id_queue_data_t; - id_queue #( .ID_WIDTH (SlvAxiIDWidth+1), .CAPACITY (4), diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index c0e404a..5767b15 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -92,8 +92,6 @@ logic w_last_d, w_last_q; typedef enum logic {W_PASSTHROUGH, W_FROM_FIFO} w_state_t; -w_state_t w_state_in, w_state_out; - logic w_fifo_full, w_fifo_empty; logic w_fifo_push, w_fifo_pop; w_state_t w_fifo_data_in, w_fifo_data_out; @@ -214,7 +212,7 @@ always_comb begin endcase end -assign cd_fifo_pop = w_fifo_data_out inside {W_FROM_FIFO} && +assign cd_fifo_pop = w_fifo_data_out == W_FROM_FIFO && ccu_resp_in.w_ready && ccu_req_out.w_valid; fifo_v3 #( From 54019fec421a37ff6fb796ae7348be11c1403f3c Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 24 Apr 2024 11:31:43 +0200 Subject: [PATCH 021/109] Fix collision check lookup address --- src/ccu_ctrl.sv | 15 +++++++++------ src/ccu_ctrl_decoder.sv | 18 ++++++++++++++---- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 381041b..27420c6 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -126,13 +126,15 @@ logic ccu_ar_ready, ccu_aw_ready; snoop_req_t [NoMstPorts-1:0] dec_snoop_req; -logic dec_lookup_req, dec_collision; +logic dec_lookup_req; +logic [AxiAddrWidth-1:0] dec_lookup_addr; logic dec_cd_fifo_stall; ccu_ctrl_decoder #( .DcacheLineWidth (DcacheLineWidth), .AxiDataWidth (AxiDataWidth), + .AxiAddrWidth (AxiAddrWidth), .NoMstPorts (NoMstPorts), .SlvAxiIDWidth (SlvAxiIDWidth), .slv_aw_chan_t (slv_aw_chan_t), @@ -172,10 +174,12 @@ ccu_ctrl_decoder #( .first_responder_o (dec_first_responder), .lookup_req_o (dec_lookup_req), - .collision_i (dec_collision), + .lookup_addr_o (dec_lookup_addr), .cd_fifo_stall_i (dec_cd_fifo_stall), .b_queue_full_i (~b_inp_gnt), - .r_queue_full_i (~r_inp_gnt) + .r_queue_full_i (~r_inp_gnt), + .b_collision_i (b_exists), + .r_collision_i (r_exists) ); ccu_ctrl_snoop_unit #( @@ -331,13 +335,12 @@ logic [AxiAddrWidth-1:0] r_inp_aligned_addr; logic [AxiAddrWidth-1:0] r_exists_aligned_addr; assign b_inp_aligned_addr = axi_pkg::aligned_addr(ccu_req_i.aw.addr,ccu_req_i.aw.size); -assign b_exists_aligned_addr = axi_pkg::aligned_addr(dec_ccu_req_holder.aw.addr,dec_ccu_req_holder.aw.size); +assign b_exists_aligned_addr = dec_lookup_addr; assign r_inp_aligned_addr = axi_pkg::aligned_addr(ccu_req_i.ar.addr,ccu_req_i.ar.size); -assign r_exists_aligned_addr = axi_pkg::aligned_addr(dec_ccu_req_holder.ar.addr,dec_ccu_req_holder.ar.size); +assign r_exists_aligned_addr = dec_lookup_addr; // Exists -assign dec_collision = (b_exists || r_exists); // _gnt is not used as it is combinationally set when req = 1 diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 882dcb6..2ccd6d6 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -2,6 +2,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; #( parameter int unsigned DcacheLineWidth = 0, parameter int unsigned AxiDataWidth = 0, + parameter int unsigned AxiAddrWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, parameter type slv_aw_chan_t = logic, @@ -46,9 +47,11 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; output logic [MstIdxBits-1:0] first_responder_o, output logic lookup_req_o, - input logic collision_i, + output logic [AxiAddrWidth-1:0] lookup_addr_o, input logic b_queue_full_i, input logic r_queue_full_i, + input logic b_collision_i, + input logic r_collision_i, input logic cd_fifo_stall_i ); @@ -57,6 +60,10 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; logic [NoMstPorts-1:0] ac_handshake_q, ac_handshake; logic [NoMstPorts-1:0] cr_handshake_q, cr_handshake; + logic collision; + + assign collision = b_collision_i || r_collision_i; + enum { IDLE, DECODE_R, @@ -224,7 +231,8 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; decode_r = 1'b0; decode_w = 1'b0; - lookup_req_o = 1'b0; + lookup_req_o = 1'b0; + lookup_addr_o = axi_pkg::aligned_addr(ccu_req_holder_q.ar.addr,ccu_req_holder_q.ar.size); case (state_q) IDLE: begin @@ -247,7 +255,8 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; DECODE_W: begin lookup_req_o = 1'b1; - if (!collision_i && !b_queue_full_i && !cd_fifo_stall_i) begin + lookup_addr_o = axi_pkg::aligned_addr(ccu_req_holder_q.aw.addr,ccu_req_holder_q.aw.size); + if (!collision && !b_queue_full_i && !cd_fifo_stall_i) begin state_d = SEND_INVALID_W; slv_aw_ready_o = 1'b1; end @@ -255,7 +264,8 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; DECODE_R: begin lookup_req_o = 1'b1; - if (!collision_i && !r_queue_full_i && !cd_fifo_stall_i) begin + lookup_addr_o = axi_pkg::aligned_addr(ccu_req_holder_q.ar.addr,ccu_req_holder_q.ar.size); + if (!collision && !r_queue_full_i && !cd_fifo_stall_i) begin state_d = send_invalid_r ? SEND_INVALID_R : SEND_READ; slv_ar_ready_o = 1'b1; end From 15c50ad478525959f1f044d559e3e61502bf7634 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 26 Apr 2024 12:00:27 +0200 Subject: [PATCH 022/109] Decouple AC reqs and CR resps --- src/ccu_ctrl_decoder.sv | 315 +++++++++++++++++++++++++----------- src/ccu_ctrl_memory_unit.sv | 15 +- 2 files changed, 226 insertions(+), 104 deletions(-) diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 2ccd6d6..6f86ae8 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -56,10 +56,30 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; input logic cd_fifo_stall_i ); - logic [NoMstPorts-1:0] initiator_d, initiator_q; + typedef enum logic [1:0] { INVALID_W, INVALID_R, RESP_R } cr_cmd_fifo_t; + + logic [NoMstPorts-1:0] ac_initiator; logic [NoMstPorts-1:0] ac_handshake_q, ac_handshake; + + logic [NoMstPorts-1:0] cr_aw_initiator, cr_ar_initiator; + logic [NoMstPorts-1:0] cr_aw_mask, cr_ar_mask; logic [NoMstPorts-1:0] cr_handshake_q, cr_handshake; + // AW FIFO + logic aw_fifo_empty, aw_fifo_full; + logic aw_fifo_pop, aw_fifo_push; + slv_aw_chan_t aw_fifo_in, aw_fifo_out; + + // AR FIFO + logic ar_fifo_empty, ar_fifo_full; + logic ar_fifo_pop, ar_fifo_push; + slv_ar_chan_t ar_fifo_in, ar_fifo_out; + + // CR CMD FIFO + logic cr_cmd_fifo_empty, cr_cmd_fifo_full; + logic cr_cmd_fifo_pop, cr_cmd_fifo_push; + cr_cmd_fifo_t cr_cmd_fifo_in, cr_cmd_fifo_out; + logic collision; assign collision = b_collision_i || r_collision_i; @@ -70,10 +90,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; DECODE_W, SEND_READ, SEND_INVALID_R, - SEND_INVALID_W, - WAIT_RESP_R, - WAIT_INVALID_R, - WAIT_INVALID_W + SEND_INVALID_W } state_d, state_q; typedef struct packed { @@ -99,6 +116,12 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign cr_handshake[i] = m2s_resp_i[i].cr_valid & s2m_req_o[i].cr_ready; end + logic cr_done; + + snoop_ac_t [NoMstPorts-1:0] ac; + logic [NoMstPorts-1:0] ac_valid; + logic [NoMstPorts-1:0] cr_ready; + // Hold incoming ACE request slv_req_t ccu_req_holder_q; @@ -115,15 +138,13 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; end end - assign ccu_req_holder_o = ccu_req_holder_q; - // Hold snoop AC handshakes for (genvar i = 0; i < NoMstPorts; i = i + 1) begin always_ff @ (posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin ac_handshake_q[i] <= '0; end else if(state_q inside {DECODE_R, DECODE_W}) begin - ac_handshake_q[i] <= initiator_d[i]; + ac_handshake_q[i] <= ac_initiator[i]; end else if(state_q inside {SEND_READ, SEND_INVALID_R, SEND_INVALID_W}) begin if (ac_handshake[i]) ac_handshake_q[i] <= 1'b1; @@ -142,14 +163,12 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; shared_q <= '0; dirty_q <= '0; response_error_q <= '0; - end else if(state_q == IDLE) begin + end else if(cr_done) begin cr_handshake_q <= '0; data_available_q <= '0; shared_q <= '0; dirty_q <= '0; response_error_q <= '0; - end else if(state_q inside {SEND_READ, SEND_INVALID_R, SEND_INVALID_W}) begin - cr_handshake_q <= initiator_q; end else begin for (int i = 0; i < NoMstPorts; i = i + 1) begin if(cr_handshake[i]) begin @@ -174,7 +193,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; if(!rst_ni) begin first_responder_q <= '0; snoop_resp_found_q <= 1'b0; - end else if(state_q == IDLE) begin + end else if(cr_done) begin first_responder_q <= '0; snoop_resp_found_q <= 1'b0; end else if (!snoop_resp_found_q) begin @@ -196,11 +215,9 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; always_ff @(posedge clk_i, negedge rst_ni) begin : ccu_present_state if(!rst_ni) begin state_q <= IDLE; - initiator_q <= '0; prio_q <= '0; end else begin state_q <= state_d; - initiator_q <= initiator_d; prio_q <= prio_d; end end @@ -211,22 +228,16 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; always_comb begin + ac = '0; + ac_valid = '0; + // Next state state_d = state_q; - initiator_d = initiator_q; prio_d = prio_q; - // Output - s2m_req_o = '0; - slv_ar_ready_o = '0; slv_aw_ready_o = '0; - su_valid_o = 1'b0; - mu_valid_o = 1'b0; - su_op_o = READ_SNP_DATA; - mu_op_o = SEND_AXI_REQ_R; - // Ctrl flags decode_r = 1'b0; decode_w = 1'b0; @@ -234,28 +245,36 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; lookup_req_o = 1'b0; lookup_addr_o = axi_pkg::aligned_addr(ccu_req_holder_q.ar.addr,ccu_req_holder_q.ar.size); + cr_cmd_fifo_in = RESP_R; + aw_fifo_push = 1'b0; + ar_fifo_push = 1'b0; + + ac_initiator = '0; + case (state_q) IDLE: begin - initiator_d = '0; prio_d = '0; // wait for incoming valid request from master if(ccu_req_i.ar_valid & prio_r) begin decode_r = 1'b1; state_d = DECODE_R; - initiator_d[ccu_req_i.ar.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; prio_d.waiting_w = ccu_req_i.aw_valid; end else if(ccu_req_i.aw_valid & prio_w) begin decode_w = 1'b1; state_d = DECODE_W; - initiator_d[ccu_req_i.aw.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; prio_d.waiting_r = ccu_req_i.ar_valid; end end DECODE_W: begin + // AC initiator + ac_initiator = '0; + ac_initiator[ccu_req_i.aw.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; + // Collision lookup lookup_req_o = 1'b1; lookup_addr_o = axi_pkg::aligned_addr(ccu_req_holder_q.aw.addr,ccu_req_holder_q.aw.size); + // Stall or accept request if (!collision && !b_queue_full_i && !cd_fifo_stall_i) begin state_d = SEND_INVALID_W; slv_aw_ready_o = 1'b1; @@ -263,8 +282,13 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; end DECODE_R: begin + // AC initiator + ac_initiator = '0; + ac_initiator[ccu_req_i.ar.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; + // Collision lookup lookup_req_o = 1'b1; lookup_addr_o = axi_pkg::aligned_addr(ccu_req_holder_q.ar.addr,ccu_req_holder_q.ar.size); + // Stall or accept request if (!collision && !r_queue_full_i && !cd_fifo_stall_i) begin state_d = send_invalid_r ? SEND_INVALID_R : SEND_READ; slv_ar_ready_o = 1'b1; @@ -272,115 +296,218 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; end SEND_READ: begin + cr_cmd_fifo_in = RESP_R; // wait for all snoop masters to perform an handshake - if (ac_handshake_q == '1) begin - state_d = WAIT_RESP_R; + if (ac_handshake_q == '1 && !cr_cmd_fifo_full && !ar_fifo_full) begin + state_d = IDLE; + ar_fifo_push = 1'b1; end // send request to snooping masters for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac.addr = ccu_req_holder_q.ar.addr; - s2m_req_o[n].ac.prot = ccu_req_holder_q.ar.prot; - s2m_req_o[n].ac.snoop = ccu_req_holder_q.ar.snoop; - s2m_req_o[n].ac_valid = !ac_handshake_q[n]; + ac[n].addr = ccu_req_holder_q.ar.addr; + ac[n].prot = ccu_req_holder_q.ar.prot; + ac[n].snoop = ccu_req_holder_q.ar.snoop; + ac_valid[n] = !ac_handshake_q[n]; end end SEND_INVALID_R: begin + cr_cmd_fifo_in = INVALID_R; // wait for all snoop masters to perform an handshake - if (ac_handshake_q == '1) begin - state_d = WAIT_INVALID_R; + if (ac_handshake_q == '1 && !cr_cmd_fifo_full && !ar_fifo_full) begin + state_d = IDLE; + ar_fifo_push = 1'b1; end // send request to snooping masters for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac.addr = ccu_req_holder_q.ar.addr; - s2m_req_o[n].ac.prot = ccu_req_holder_q.ar.prot; - s2m_req_o[n].ac.snoop = snoop_pkg::CLEAN_INVALID; - s2m_req_o[n].ac_valid = !ac_handshake_q[n]; + ac[n].addr = ccu_req_holder_q.ar.addr; + ac[n].prot = ccu_req_holder_q.ar.prot; + ac[n].snoop = snoop_pkg::CLEAN_INVALID; + ac_valid[n] = !ac_handshake_q[n]; end end SEND_INVALID_W: begin + cr_cmd_fifo_in = INVALID_W; // wait for all snoop masters to perform an handshake - if (ac_handshake_q == '1) begin - state_d = WAIT_INVALID_W; + if (ac_handshake_q == '1 && !cr_cmd_fifo_full && !aw_fifo_full) begin + state_d = IDLE; + aw_fifo_push = 1'b1; end // send request to snooping masters for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac.addr = ccu_req_holder_q.aw.addr; - s2m_req_o[n].ac.prot = ccu_req_holder_q.aw.prot; - s2m_req_o[n].ac.snoop = snoop_pkg::CLEAN_INVALID; - s2m_req_o[n].ac_valid = !ac_handshake_q[n]; + ac[n].addr = ccu_req_holder_q.aw.addr; + ac[n].prot = ccu_req_holder_q.aw.prot; + ac[n].snoop = snoop_pkg::CLEAN_INVALID; + ac_valid[n] = !ac_handshake_q[n]; end end + endcase + end - WAIT_RESP_R: begin - // wait for all CR handshakes - if (cr_handshake_q == '1) begin + assign cr_aw_initiator = 1 << aw_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; + assign cr_ar_initiator = 1 << ar_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; + assign cr_aw_mask = cr_aw_initiator | cr_handshake_q; + assign cr_ar_mask = cr_ar_initiator | cr_handshake_q; - if(|(data_available_q & ~response_error_q)) begin - su_op_o = READ_SNP_DATA; - su_valid_o = 1'b1; - if (su_ready_i) begin - state_d = IDLE; - end - end else begin - mu_op_o = SEND_AXI_REQ_R; - mu_valid_o = 1'b1; - if (mu_ready_i) begin - state_d = IDLE; + assign cr_done = (mu_valid_o && mu_ready_i) || (su_valid_o && su_ready_i); + + always_comb begin + + su_valid_o = 1'b0; + mu_valid_o = 1'b0; + su_op_o = READ_SNP_DATA; + mu_op_o = SEND_AXI_REQ_R; + + aw_fifo_pop = '0; + ar_fifo_pop = '0; + + cr_ready = '0; + + if (!cr_cmd_fifo_empty) begin + case (cr_cmd_fifo_out) + + RESP_R: begin + // wait for all CR handshakes + if (cr_ar_mask == '1) begin + + if(|(data_available_q & ~response_error_q)) begin + su_op_o = READ_SNP_DATA; + su_valid_o = 1'b1; + if (su_ready_i) begin + ar_fifo_pop = 1'b1; + end + end else begin + mu_op_o = SEND_AXI_REQ_R; + mu_valid_o = 1'b1; + if (mu_ready_i) begin + ar_fifo_pop = 1'b1; + end end end + + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) + cr_ready[n] = !cr_ar_mask[n]; end - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cr_ready = !cr_handshake_q[n]; - end + INVALID_R: begin + // wait for all CR handshakes + if (cr_ar_mask == '1) begin - WAIT_INVALID_R: begin - // wait for all CR handshakes - if (cr_handshake_q == '1 && (ccu_req_i.r_ready || ccu_req_holder_q.ar.lock)) begin + if (mu_ready_i && (ar_fifo_out.lock || su_ready_i)) begin + ar_fifo_pop = 1'b1; + su_valid_o = !ar_fifo_out.lock; + end - if (mu_ready_i && (ccu_req_holder_q.ar.lock || su_ready_i)) begin - state_d = IDLE; - su_valid_o = !ccu_req_holder_q.ar.lock; + if(|(data_available_q & ~response_error_q)) begin + mu_op_o = SEND_AXI_REQ_WRITE_BACK_R; + mu_valid_o = 1'b1; + end else if (ar_fifo_out.lock) begin + mu_op_o = SEND_AXI_REQ_R; + mu_valid_o = 1'b1; + end end - if(|(data_available_q & ~response_error_q)) begin - mu_op_o = SEND_AXI_REQ_WRITE_BACK_R; - mu_valid_o = 1'b1; - end else if (ccu_req_holder_q.ar.lock) begin - mu_op_o = SEND_AXI_REQ_R; - mu_valid_o = 1'b1; - end - end + su_op_o = SEND_INVALID_ACK_R; - su_op_o = SEND_INVALID_ACK_R; + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) + cr_ready[n] = !cr_ar_mask[n]; + end - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cr_ready = !cr_handshake_q[n]; - end + INVALID_W: begin + // wait for all CR handshakes + if (cr_aw_mask == '1) begin - WAIT_INVALID_W: begin - // wait for all CR handshakes - if (cr_handshake_q == '1) begin + mu_valid_o = 1'b1; - mu_valid_o = 1'b1; + if (mu_ready_i) begin + aw_fifo_pop = 1'b1; + end - if (mu_ready_i) begin - state_d = IDLE; + if(|(data_available_q & ~response_error_q)) begin + mu_op_o = SEND_AXI_REQ_WRITE_BACK_W; + end else begin + mu_op_o = SEND_AXI_REQ_W; + end end - if(|(data_available_q & ~response_error_q)) begin - mu_op_o = SEND_AXI_REQ_WRITE_BACK_W; - end else begin - mu_op_o = SEND_AXI_REQ_W; - end + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) + cr_ready[n] = !cr_aw_mask[n]; end + endcase + end + end - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cr_ready = !cr_handshake_q[n]; - end - endcase + always_comb begin + s2m_req_o = '0; + for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin + s2m_req_o[n].ac = ac[n]; + s2m_req_o[n].ac_valid = ac_valid[n]; + s2m_req_o[n].cr_ready = cr_ready[n]; + end end + assign cr_cmd_fifo_push = aw_fifo_push || ar_fifo_push; + assign cr_cmd_fifo_pop = aw_fifo_pop || ar_fifo_pop; + + fifo_v3 #( + .FALL_THROUGH(1), + .DEPTH(NoMstPorts), + .dtype (cr_cmd_fifo_t) + ) cr_cmd_fifo_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i (1'b0), + .full_o (cr_cmd_fifo_full), + .empty_o (cr_cmd_fifo_empty), + .usage_o (), + .data_i (cr_cmd_fifo_in), + .push_i (cr_cmd_fifo_push), + .data_o (cr_cmd_fifo_out), + .pop_i (cr_cmd_fifo_pop) + ); + + assign ar_fifo_in = ccu_req_holder_q.ar; + assign ccu_req_holder_o.ar = ar_fifo_out; + + fifo_v3 #( + .FALL_THROUGH(1), + .DEPTH(NoMstPorts), + .dtype (slv_ar_chan_t) + ) ar_fifo_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i (1'b0), + .full_o (ar_fifo_full), + .empty_o (ar_fifo_empty), + .usage_o (), + .data_i (ar_fifo_in), + .push_i (ar_fifo_push), + .data_o (ar_fifo_out), + .pop_i (ar_fifo_pop) + ); + + assign aw_fifo_in = ccu_req_holder_q.aw; + assign ccu_req_holder_o.aw = aw_fifo_out; + + fifo_v3 #( + .FALL_THROUGH(1), + .DEPTH(NoMstPorts), + .dtype (slv_aw_chan_t) + ) aw_fifo_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i (1'b0), + .full_o (aw_fifo_full), + .empty_o (aw_fifo_empty), + .usage_o (), + .data_i (aw_fifo_in), + .push_i (aw_fifo_push), + .data_o (aw_fifo_out), + .pop_i (aw_fifo_pop) + ); + endmodule \ No newline at end of file diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 5767b15..cb91bf1 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -48,7 +48,7 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; ); localparam CD_FIFO_DEPTH = 2; -localparam AXI_FIFO_DEPTH = 4; +localparam AXI_FIFO_DEPTH = 0; // Passthrough localparam W_FIFO_DEPTH = 2; mst_req_t ccu_req_out; @@ -256,15 +256,10 @@ always_ff @(posedge clk_i or negedge rst_ni) begin end end -logic w_fifo_data_in_temp, w_fifo_data_out_temp; - -assign w_fifo_data_in_temp = logic'(w_fifo_data_in); -assign w_fifo_data_out = w_state_t'(w_fifo_data_out_temp); - fifo_v3 #( .FALL_THROUGH(0), - .DATA_WIDTH($bits(w_state_t)), - .DEPTH(W_FIFO_DEPTH) + .DEPTH(W_FIFO_DEPTH), + .dtype(w_state_t) ) w_fifo_i ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -273,9 +268,9 @@ fifo_v3 #( .full_o (w_fifo_full), .empty_o (w_fifo_empty), .usage_o (), - .data_i (w_fifo_data_in_temp), + .data_i (w_fifo_data_in), .push_i (w_fifo_push), - .data_o (w_fifo_data_out_temp), + .data_o (w_fifo_data_out), .pop_i (w_fifo_pop) ); From e74eb7ba2f16e3245c67d86ccdeb2fa1ef818366 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 29 Apr 2024 15:24:43 +0200 Subject: [PATCH 023/109] Rework the decoder block --- src/ccu_ctrl.sv | 28 ++-- src/ccu_ctrl_decoder.sv | 335 +++++++++++++++++++++------------------- src/ccu_ctrl_pkg.sv | 2 + 3 files changed, 193 insertions(+), 172 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 27420c6..faa85e4 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -129,6 +129,11 @@ snoop_req_t [NoMstPorts-1:0] dec_snoop_req; logic dec_lookup_req; logic [AxiAddrWidth-1:0] dec_lookup_addr; +slv_aw_chan_t b_queue_aw; +slv_ar_chan_t r_queue_ar; + +logic b_queue_push, r_queue_push; + logic dec_cd_fifo_stall; ccu_ctrl_decoder #( @@ -176,10 +181,15 @@ ccu_ctrl_decoder #( .lookup_req_o (dec_lookup_req), .lookup_addr_o (dec_lookup_addr), .cd_fifo_stall_i (dec_cd_fifo_stall), + .b_queue_full_i (~b_inp_gnt), .r_queue_full_i (~r_inp_gnt), .b_collision_i (b_exists), - .r_collision_i (r_exists) + .r_collision_i (r_exists), + .b_queue_push_o (b_queue_push), + .r_queue_push_o (r_queue_push), + .b_queue_aw_o (b_queue_aw), + .r_queue_ar_o (r_queue_ar) ); ccu_ctrl_snoop_unit #( @@ -325,19 +335,15 @@ end // Collision Check // ///////////////////// -localparam logic [AxiAddrWidth-1:0] EXISTS_MASK = CollisionOnSetOnly ? - {DCacheIndexWidth{1'b1}} << DCacheByteOffset : - ~{DCacheByteOffset{1'b1}}; - logic [AxiAddrWidth-1:0] b_inp_aligned_addr; logic [AxiAddrWidth-1:0] b_exists_aligned_addr; logic [AxiAddrWidth-1:0] r_inp_aligned_addr; logic [AxiAddrWidth-1:0] r_exists_aligned_addr; -assign b_inp_aligned_addr = axi_pkg::aligned_addr(ccu_req_i.aw.addr,ccu_req_i.aw.size); +assign b_inp_aligned_addr = axi_pkg::aligned_addr(b_queue_aw.addr,b_queue_aw.size); assign b_exists_aligned_addr = dec_lookup_addr; -assign r_inp_aligned_addr = axi_pkg::aligned_addr(ccu_req_i.ar.addr,ccu_req_i.ar.size); +assign r_inp_aligned_addr = axi_pkg::aligned_addr(r_queue_ar.addr,r_queue_ar.size); assign r_exists_aligned_addr = dec_lookup_addr; // Exists @@ -365,13 +371,13 @@ assign r_oup_req = ccu_resp_o.r_valid && ccu_req_i.r_ready && ccu_resp_o.r.last; // _gnt is not used as it is combinationally set when req = 1 // Inp -assign b_inp_id = ccu_req_i.aw.id; +assign b_inp_id = b_queue_aw.id; assign b_inp_data = b_inp_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; -assign b_inp_req = ccu_req_i.aw_valid && ccu_resp_o.aw_ready; +assign b_inp_req = b_queue_push; -assign r_inp_id = ccu_req_i.ar.id; +assign r_inp_id = r_queue_ar.id; assign r_inp_data = r_inp_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; -assign r_inp_req = ccu_req_i.ar_valid && ccu_resp_o.ar_ready; +assign r_inp_req = r_queue_push; id_queue #( .ID_WIDTH (SlvAxiIDWidth+1), diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 6f86ae8..d2d4df2 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -48,7 +48,11 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; output logic lookup_req_o, output logic [AxiAddrWidth-1:0] lookup_addr_o, + output logic b_queue_push_o, + output slv_aw_chan_t b_queue_aw_o, input logic b_queue_full_i, + output logic r_queue_push_o, + output slv_ar_chan_t r_queue_ar_o, input logic r_queue_full_i, input logic b_collision_i, input logic r_collision_i, @@ -56,8 +60,6 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; input logic cd_fifo_stall_i ); - typedef enum logic [1:0] { INVALID_W, INVALID_R, RESP_R } cr_cmd_fifo_t; - logic [NoMstPorts-1:0] ac_initiator; logic [NoMstPorts-1:0] ac_handshake_q, ac_handshake; @@ -65,6 +67,10 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; logic [NoMstPorts-1:0] cr_aw_mask, cr_ar_mask; logic [NoMstPorts-1:0] cr_handshake_q, cr_handshake; + typedef enum logic [1:0] { INVALID_W, INVALID_R, RESP_R } cr_cmd_fifo_t; + + logic stall; + // AW FIFO logic aw_fifo_empty, aw_fifo_full; logic aw_fifo_pop, aw_fifo_push; @@ -80,36 +86,126 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; logic cr_cmd_fifo_pop, cr_cmd_fifo_push; cr_cmd_fifo_t cr_cmd_fifo_in, cr_cmd_fifo_out; - logic collision; + enum { + AC_IDLE, + AC_BUSY + } ac_state_d, ac_state_q; - assign collision = b_collision_i || r_collision_i; + logic decode_r, decode_w; - enum { - IDLE, - DECODE_R, - DECODE_W, - SEND_READ, - SEND_INVALID_R, - SEND_INVALID_W - } state_d, state_q; + // Hold incoming ACE request - typedef struct packed { - logic waiting_w; - logic waiting_r; - } prio_t; + slv_aw_chan_t aw_holder; + logic aw_holder_valid, aw_holder_ready; + slv_ar_chan_t ar_holder; + logic ar_holder_valid, ar_holder_ready; + snoop_ac_t aw_ac, ar_ac; + logic [NoMstPorts-1:0] aw_initiator, ar_initiator; - prio_t prio_d, prio_q; + assign b_queue_push_o = aw_holder_ready && aw_holder_valid; + assign r_queue_push_o = ar_holder_ready && ar_holder_valid; - logic prio_r, prio_w; + assign b_queue_aw_o = aw_holder; + assign r_queue_ar_o = ar_holder; - assign prio_r = !ccu_req_i.aw_valid || prio_q.waiting_r || !prio_q.waiting_w; - assign prio_w = !ccu_req_i.ar_valid || prio_q.waiting_w; + assign aw_initiator = 1 << aw_holder.id[SlvAxiIDWidth+:MstIdxBits]; + assign ar_initiator = 1 << ar_holder.id[SlvAxiIDWidth+:MstIdxBits]; - logic decode_r, decode_w; logic send_invalid_r; + logic collision; + + assign send_invalid_r = ar_holder.snoop == snoop_pkg::CLEAN_UNIQUE || ar_holder.lock; + assign collision = b_collision_i || r_collision_i; + + always_comb begin + aw_ac = '0; + aw_ac.addr = aw_holder.addr; + aw_ac.prot = aw_holder.prot; + aw_ac.snoop = snoop_pkg::CLEAN_INVALID; + + ar_ac = '0; + ar_ac.addr = ar_holder.addr; + ar_ac.prot = ar_holder.prot; + ar_ac.snoop = send_invalid_r ? snoop_pkg::CLEAN_INVALID : ar_holder.snoop; + end + + spill_register #( + .T (slv_aw_chan_t), + .Bypass (1'b0) + ) aw_spill_register ( + .clk_i, + .rst_ni, + .valid_i (ccu_req_i.aw_valid), + .ready_o (slv_aw_ready_o), + .data_i (ccu_req_i.aw), + .valid_o (aw_holder_valid), + .ready_i (aw_holder_ready), + .data_o (aw_holder) + ); + + spill_register #( + .T (slv_ar_chan_t), + .Bypass (1'b0) + ) ar_spill_register ( + .clk_i, + .rst_ni, + .valid_i (ccu_req_i.ar_valid), + .ready_o (slv_ar_ready_o), + .data_i (ccu_req_i.ar), + .valid_o (ar_holder_valid), + .ready_i (ar_holder_ready), + .data_o (ar_holder) + ); + + logic [1:0] arb_req_in, arb_gnt_in; + logic arb_req_out, arb_gnt_out; + snoop_ac_t arb_ac_out; + logic arb_idx_out; + + assign arb_req_in = {aw_holder_valid, ar_holder_valid}; + assign {aw_holder_ready, ar_holder_ready} = arb_gnt_in; + + rr_arb_tree #( + .NumIn ( 2 ), + .DataType ( snoop_ac_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ), + .ExtPrio ( 1'b0 ) + ) arbiter_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( rr ), + .req_i ( arb_req_in ), + .gnt_o ( arb_gnt_in ), + .data_i ( {aw_ac, ar_ac} ), + .req_o ( arb_req_out ), + .gnt_i ( arb_gnt_out ), + .data_o ( arb_ac_out ), + .idx_o ( arb_idx_out ) + ); + + assign stall = |{ + // Collission on address + collision, + // CR CMD FIFO full + cr_cmd_fifo_full, + // CD CMD FIFO full + cd_fifo_stall_i, + // AR requests, ID queue or FIFO full + arb_idx_out == 0 && (r_queue_full_i || ar_fifo_full), + // AW requests, ID queue or FIFO full + arb_idx_out == 1 && (b_queue_full_i || aw_fifo_full), + // AC is busy + ac_state_q == AC_BUSY + }; + assign arb_gnt_out = !stall; + assign lookup_req_o = arb_req_out; + assign lookup_addr_o = arb_idx_out == 1 ? + axi_pkg::aligned_addr(aw_holder.addr,aw_holder.size): + axi_pkg::aligned_addr(ar_holder.addr,ar_holder.size); - assign send_invalid_r = ccu_req_holder_q.ar.snoop == snoop_pkg::CLEAN_UNIQUE || ccu_req_holder_q.ar.lock; for (genvar i = 0; i < NoMstPorts; i = i + 1) begin assign ac_handshake[i] = m2s_resp_i[i].ac_ready & s2m_req_o[i].ac_valid; @@ -118,34 +214,18 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; logic cr_done; - snoop_ac_t [NoMstPorts-1:0] ac; - logic [NoMstPorts-1:0] ac_valid; - logic [NoMstPorts-1:0] cr_ready; - - // Hold incoming ACE request - slv_req_t ccu_req_holder_q; - - always_ff @(posedge clk_i , negedge rst_ni) begin - if(!rst_ni) begin - ccu_req_holder_q <= '0; - end else if(decode_r) begin - ccu_req_holder_q.ar <= ccu_req_i.ar; - ccu_req_holder_q.ar_valid <= ccu_req_i.ar_valid; - ccu_req_holder_q.r_ready <= ccu_req_i.r_ready; - end else if(decode_w) begin - ccu_req_holder_q.aw <= ccu_req_i.aw; - ccu_req_holder_q.aw_valid <= ccu_req_i.aw_valid; - end - end + snoop_ac_t [NoMstPorts-1:0] ac_out; + logic [NoMstPorts-1:0] ac_out_valid; + logic [NoMstPorts-1:0] cr_out_ready; // Hold snoop AC handshakes for (genvar i = 0; i < NoMstPorts; i = i + 1) begin always_ff @ (posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin ac_handshake_q[i] <= '0; - end else if(state_q inside {DECODE_R, DECODE_W}) begin + end else if(decode_r || decode_w) begin ac_handshake_q[i] <= ac_initiator[i]; - end else if(state_q inside {SEND_READ, SEND_INVALID_R, SEND_INVALID_W}) begin + end else if(ac_state_q == AC_BUSY) begin if (ac_handshake[i]) ac_handshake_q[i] <= 1'b1; end else begin @@ -209,16 +289,18 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign first_responder_o = first_responder_q; + snoop_ac_t ac_q, ac_d; + // ---------------------- // Current State Block // ---------------------- - always_ff @(posedge clk_i, negedge rst_ni) begin : ccu_present_state + always_ff @(posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin - state_q <= IDLE; - prio_q <= '0; + ac_state_q <= AC_IDLE; + ac_q <= '0; end else begin - state_q <= state_d; - prio_q <= prio_d; + ac_state_q <= ac_state_d; + ac_q <= ac_d; end end @@ -228,123 +310,52 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; always_comb begin - ac = '0; - ac_valid = '0; + ac_d = ac_q; + ac_out_valid = '0; // Next state - state_d = state_q; - prio_d = prio_q; - - slv_ar_ready_o = '0; - slv_aw_ready_o = '0; + ac_state_d = ac_state_q; // Ctrl flags decode_r = 1'b0; decode_w = 1'b0; - lookup_req_o = 1'b0; - lookup_addr_o = axi_pkg::aligned_addr(ccu_req_holder_q.ar.addr,ccu_req_holder_q.ar.size); - cr_cmd_fifo_in = RESP_R; aw_fifo_push = 1'b0; ar_fifo_push = 1'b0; ac_initiator = '0; - case (state_q) - IDLE: begin - - prio_d = '0; - // wait for incoming valid request from master - if(ccu_req_i.ar_valid & prio_r) begin - decode_r = 1'b1; - state_d = DECODE_R; - prio_d.waiting_w = ccu_req_i.aw_valid; - end else if(ccu_req_i.aw_valid & prio_w) begin - decode_w = 1'b1; - state_d = DECODE_W; - prio_d.waiting_r = ccu_req_i.ar_valid; - end - end - - DECODE_W: begin - // AC initiator - ac_initiator = '0; - ac_initiator[ccu_req_i.aw.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; - // Collision lookup - lookup_req_o = 1'b1; - lookup_addr_o = axi_pkg::aligned_addr(ccu_req_holder_q.aw.addr,ccu_req_holder_q.aw.size); - // Stall or accept request - if (!collision && !b_queue_full_i && !cd_fifo_stall_i) begin - state_d = SEND_INVALID_W; - slv_aw_ready_o = 1'b1; - end - end - - DECODE_R: begin - // AC initiator - ac_initiator = '0; - ac_initiator[ccu_req_i.ar.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; - // Collision lookup - lookup_req_o = 1'b1; - lookup_addr_o = axi_pkg::aligned_addr(ccu_req_holder_q.ar.addr,ccu_req_holder_q.ar.size); - // Stall or accept request - if (!collision && !r_queue_full_i && !cd_fifo_stall_i) begin - state_d = send_invalid_r ? SEND_INVALID_R : SEND_READ; - slv_ar_ready_o = 1'b1; - end - end - - SEND_READ: begin - cr_cmd_fifo_in = RESP_R; - // wait for all snoop masters to perform an handshake - if (ac_handshake_q == '1 && !cr_cmd_fifo_full && !ar_fifo_full) begin - state_d = IDLE; - ar_fifo_push = 1'b1; - end - // send request to snooping masters - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - ac[n].addr = ccu_req_holder_q.ar.addr; - ac[n].prot = ccu_req_holder_q.ar.prot; - ac[n].snoop = ccu_req_holder_q.ar.snoop; - ac_valid[n] = !ac_handshake_q[n]; - end - end - - SEND_INVALID_R: begin - cr_cmd_fifo_in = INVALID_R; - // wait for all snoop masters to perform an handshake - if (ac_handshake_q == '1 && !cr_cmd_fifo_full && !ar_fifo_full) begin - state_d = IDLE; - ar_fifo_push = 1'b1; + case (ac_state_q) + AC_IDLE: begin + if (arb_req_out && !stall) begin + ac_d = arb_ac_out; + ac_state_d = AC_BUSY; + if (arb_idx_out == 1) begin + decode_w = 1'b1; + aw_fifo_push = 1'b1; + cr_cmd_fifo_in = INVALID_W; + ac_initiator = aw_initiator; + end else if (arb_idx_out == 0) begin + decode_r = 1'b1; + ar_fifo_push = 1'b1; + cr_cmd_fifo_in = send_invalid_r ? INVALID_R : RESP_R; + ac_initiator = ar_initiator; + end end - // send request to snooping masters - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - ac[n].addr = ccu_req_holder_q.ar.addr; - ac[n].prot = ccu_req_holder_q.ar.prot; - ac[n].snoop = snoop_pkg::CLEAN_INVALID; - ac_valid[n] = !ac_handshake_q[n]; - end end - SEND_INVALID_W: begin - cr_cmd_fifo_in = INVALID_W; - // wait for all snoop masters to perform an handshake - if (ac_handshake_q == '1 && !cr_cmd_fifo_full && !aw_fifo_full) begin - state_d = IDLE; - aw_fifo_push = 1'b1; - end - // send request to snooping masters - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - ac[n].addr = ccu_req_holder_q.aw.addr; - ac[n].prot = ccu_req_holder_q.aw.prot; - ac[n].snoop = snoop_pkg::CLEAN_INVALID; - ac_valid[n] = !ac_handshake_q[n]; + AC_BUSY: begin + if ((ac_handshake_q | ac_handshake) == '1) begin + ac_state_d = AC_IDLE; end + ac_out_valid = ~ac_handshake_q; end endcase end + assign ac_out = {NoMstPorts{ac_q}}; + assign cr_aw_initiator = 1 << aw_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; assign cr_ar_initiator = 1 << ar_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; assign cr_aw_mask = cr_aw_initiator | cr_handshake_q; @@ -362,7 +373,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; aw_fifo_pop = '0; ar_fifo_pop = '0; - cr_ready = '0; + cr_out_ready = '0; if (!cr_cmd_fifo_empty) begin case (cr_cmd_fifo_out) @@ -387,10 +398,12 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; end for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - cr_ready[n] = !cr_ar_mask[n]; + cr_out_ready[n] = !cr_ar_mask[n]; end INVALID_R: begin + // TODO: sending the ack R transaction could be moved from + // the snoop unit directly here // wait for all CR handshakes if (cr_ar_mask == '1) begin @@ -401,17 +414,17 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; if(|(data_available_q & ~response_error_q)) begin mu_op_o = SEND_AXI_REQ_WRITE_BACK_R; - mu_valid_o = 1'b1; + mu_valid_o = (ar_fifo_out.lock || su_ready_i); end else if (ar_fifo_out.lock) begin mu_op_o = SEND_AXI_REQ_R; - mu_valid_o = 1'b1; + mu_valid_o = (ar_fifo_out.lock || su_ready_i); end end su_op_o = SEND_INVALID_ACK_R; for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - cr_ready[n] = !cr_ar_mask[n]; + cr_out_ready[n] = !cr_ar_mask[n]; end INVALID_W: begin @@ -432,7 +445,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; end for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - cr_ready[n] = !cr_aw_mask[n]; + cr_out_ready[n] = !cr_aw_mask[n]; end endcase end @@ -441,9 +454,9 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; always_comb begin s2m_req_o = '0; for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac = ac[n]; - s2m_req_o[n].ac_valid = ac_valid[n]; - s2m_req_o[n].cr_ready = cr_ready[n]; + s2m_req_o[n].ac = ac_out[n]; + s2m_req_o[n].ac_valid = ac_out_valid[n]; + s2m_req_o[n].cr_ready = cr_out_ready[n]; end end @@ -451,8 +464,8 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign cr_cmd_fifo_pop = aw_fifo_pop || ar_fifo_pop; fifo_v3 #( - .FALL_THROUGH(1), - .DEPTH(NoMstPorts), + .FALL_THROUGH(0), + .DEPTH(4), .dtype (cr_cmd_fifo_t) ) cr_cmd_fifo_i ( .clk_i (clk_i), @@ -468,12 +481,12 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; .pop_i (cr_cmd_fifo_pop) ); - assign ar_fifo_in = ccu_req_holder_q.ar; + assign ar_fifo_in = ar_holder; assign ccu_req_holder_o.ar = ar_fifo_out; fifo_v3 #( - .FALL_THROUGH(1), - .DEPTH(NoMstPorts), + .FALL_THROUGH(0), + .DEPTH(4), .dtype (slv_ar_chan_t) ) ar_fifo_i ( .clk_i (clk_i), @@ -489,12 +502,12 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; .pop_i (ar_fifo_pop) ); - assign aw_fifo_in = ccu_req_holder_q.aw; + assign aw_fifo_in = aw_holder; assign ccu_req_holder_o.aw = aw_fifo_out; fifo_v3 #( - .FALL_THROUGH(1), - .DEPTH(NoMstPorts), + .FALL_THROUGH(0), + .DEPTH(4), .dtype (slv_aw_chan_t) ) aw_fifo_i ( .clk_i (clk_i), diff --git a/src/ccu_ctrl_pkg.sv b/src/ccu_ctrl_pkg.sv index a9c529e..6e0f643 100644 --- a/src/ccu_ctrl_pkg.sv +++ b/src/ccu_ctrl_pkg.sv @@ -15,4 +15,6 @@ package ccu_ctrl_pkg; SEND_INVALID_ACK_R } su_op_e; + typedef enum logic { MEMORY_UNIT, SNOOP_UNIT } cd_user_t; + endpackage \ No newline at end of file From 6b0e273007ad773e69af267e59b580ac5869b452 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 3 May 2024 10:52:46 +0200 Subject: [PATCH 024/109] Correctly propagate AxiAddrWidth --- src/ace_ccu_top.sv | 1 + src/ccu_ctrl.sv | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ace_ccu_top.sv b/src/ace_ccu_top.sv index 9301061..ff47c48 100644 --- a/src/ace_ccu_top.sv +++ b/src/ace_ccu_top.sv @@ -228,6 +228,7 @@ ccu_ctrl #( .DcacheLineWidth ( Cfg.DcacheLineWidth ), .DCacheIndexWidth( Cfg.DCacheIndexWidth ), .AxiDataWidth ( Cfg.AxiDataWidth ), + .AxiAddrWidth ( Cfg.AxiAddrWidth ), .NoMstPorts ( Cfg.NoSlvPorts ), .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports .mst_aw_chan_t ( mst_stg_aw_chan_t ), // AW Channel Type, master port diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index faa85e4..463ef83 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -10,6 +10,7 @@ module ccu_ctrl import ccu_ctrl_pkg::*; import axi_pkg::*; parameter int unsigned DcacheLineWidth = 0, parameter int unsigned DCacheIndexWidth = 0, parameter int unsigned AxiDataWidth = 0, + parameter int unsigned AxiAddrWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, parameter bit CollisionOnSetOnly = 0, @@ -46,7 +47,6 @@ module ccu_ctrl import ccu_ctrl_pkg::*; import axi_pkg::*; input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i ); -localparam int unsigned AxiAddrWidth = 64; localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; localparam int unsigned DCacheByteOffset = $clog2(DcacheLineWidth/8); localparam int unsigned MstIdxBits = $clog2(NoMstPorts); From 0e7b3a0526fdc0d5852a5b291d12347d06d332a7 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 3 May 2024 15:21:07 +0200 Subject: [PATCH 025/109] Fix w_last handling in memory unit --- src/ccu_ctrl_memory_unit.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index cb91bf1..5df7b1d 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -296,7 +296,7 @@ always_comb begin end W_FROM_FIFO: begin // Connect the FIFO as long as the transmission is ongoing - w_last_d = ccu_resp_in.w_ready && !cd_fifo_empty; + w_last_d = (ccu_resp_in.w_ready && !cd_fifo_empty) || w_last_q; ccu_req_out.w_valid = !cd_fifo_empty; ccu_req_out.w.strb = '1; ccu_req_out.w.data = cd_fifo_data_out; From 9a4b617824b5f74117318554934992964d0a6bdc Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 7 May 2024 17:37:23 +0200 Subject: [PATCH 026/109] Rework the design to reduce latency * Use req/gnt protocol * Serve request in the same cycle if possible --- src/ccu_ctrl.sv | 41 ++++--- src/ccu_ctrl_decoder.sv | 153 +++++++++++++++----------- src/ccu_ctrl_memory_unit.sv | 207 ++++++++++++++++++------------------ src/ccu_ctrl_snoop_unit.sv | 180 ++++++++++++++----------------- 4 files changed, 298 insertions(+), 283 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 463ef83..346c2ad 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -100,9 +100,9 @@ slv_req_t mu_ccu_req; su_op_e su_op; mu_op_e mu_op; -logic su_valid, mu_valid; +logic su_req, mu_req; -logic su_ready, mu_ready; +logic su_gnt, mu_gnt; slv_req_t dec_ccu_req_holder; @@ -167,10 +167,10 @@ ccu_ctrl_decoder #( .slv_ar_ready_o (ccu_ar_ready), .ccu_req_holder_o (dec_ccu_req_holder), - .su_ready_i (su_ready), - .mu_ready_i (mu_ready), - .su_valid_o (su_valid), - .mu_valid_o (mu_valid), + .su_gnt_i (su_gnt), + .mu_gnt_i (mu_gnt), + .su_req_o (su_req), + .mu_req_o (mu_req), .su_op_o (su_op), .mu_op_o (mu_op), .shared_o (dec_shared), @@ -223,8 +223,8 @@ ccu_ctrl_snoop_unit #( .ccu_req_holder_i (dec_ccu_req_holder), - .su_ready_o (su_ready), - .su_valid_i (su_valid), + .su_gnt_o (su_gnt), + .su_req_i (su_req), .su_op_i (su_op), .shared_i (dec_shared), @@ -269,8 +269,8 @@ ccu_ctrl_memory_unit #( .cd_fifo_full_o (mu_cd_fifo_full), .ccu_req_holder_i (dec_ccu_req_holder), - .mu_ready_o (mu_ready), - .mu_valid_i (mu_valid), + .mu_gnt_o (mu_gnt), + .mu_req_i (mu_req), .mu_op_i (mu_op), .first_responder_i (dec_first_responder) ); @@ -454,14 +454,27 @@ assign su_wb_op = su_op == READ_SNP_DATA; assign dec_cd_fifo_stall = cd_user_full; +logic cd_user_pushed_d, cd_user_pushed_q; + +always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cd_user_pushed_q <= '0; + end else begin + cd_user_pushed_q <= cd_user_pushed_d; + end +end + always_comb begin + cd_user_pushed_d = cd_user_pushed_q; cd_user_push = 1'b0; cd_user_in = MEMORY_UNIT; - if (mu_ready && mu_valid && mu_wb_op) begin - cd_user_push = 1'b1; + if (mu_req && mu_wb_op) begin + cd_user_pushed_d = !mu_gnt; + cd_user_push = !cd_user_pushed_q; cd_user_in = MEMORY_UNIT; - end else if (su_ready && su_valid && su_wb_op) begin - cd_user_push = 1'b1; + end else if (su_req && su_wb_op) begin + cd_user_pushed_d = !su_gnt; + cd_user_push = !cd_user_pushed_q; cd_user_in = SNOOP_UNIT; end end diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index d2d4df2..f74ee71 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -34,10 +34,10 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; output slv_req_t ccu_req_holder_o, - output logic su_valid_o, - input logic su_ready_i, - output logic mu_valid_o, - input logic mu_ready_i, + output logic su_req_o, + input logic su_gnt_i, + output logic mu_req_o, + input logic mu_gnt_i, output mu_op_e mu_op_o, output su_op_e su_op_o, @@ -64,8 +64,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; logic [NoMstPorts-1:0] ac_handshake_q, ac_handshake; logic [NoMstPorts-1:0] cr_aw_initiator, cr_ar_initiator; - logic [NoMstPorts-1:0] cr_aw_mask, cr_ar_mask; - logic [NoMstPorts-1:0] cr_handshake_q, cr_handshake; + logic [NoMstPorts-1:0] cr_handshake_q, cr_handshake_d, cr_handshake; typedef enum logic [1:0] { INVALID_W, INVALID_R, RESP_R } cr_cmd_fifo_t; @@ -132,7 +131,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; spill_register #( .T (slv_aw_chan_t), - .Bypass (1'b0) + .Bypass (1'b1) ) aw_spill_register ( .clk_i, .rst_ni, @@ -146,7 +145,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; spill_register #( .T (slv_ar_chan_t), - .Bypass (1'b0) + .Bypass (1'b1) ) ar_spill_register ( .clk_i, .rst_ni, @@ -176,7 +175,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i( 1'b0 ), - .rr_i ( rr ), + .rr_i ( '0 ), .req_i ( arb_req_in ), .gnt_o ( arb_gnt_in ), .data_i ( {aw_ac, ar_ac} ), @@ -236,6 +235,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; // Hold snoop CR handshakes logic [NoMstPorts-1:0] data_available_q, response_error_q, shared_q, dirty_q; + logic [NoMstPorts-1:0] data_available_d, response_error_d, shared_d, dirty_d; always_ff @ (posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin cr_handshake_q <= '0; @@ -250,24 +250,28 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; dirty_q <= '0; response_error_q <= '0; end else begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - if(cr_handshake[i]) begin - cr_handshake_q[i] <= 1'b1; - data_available_q[i] <= m2s_resp_i[i].cr_resp.dataTransfer; - shared_q[i] <= m2s_resp_i[i].cr_resp.isShared; - dirty_q[i] <= m2s_resp_i[i].cr_resp.passDirty; - response_error_q[i] <= m2s_resp_i[i].cr_resp.error; - end - end + cr_handshake_q <= cr_handshake_d; + data_available_q <= data_available_d; + shared_q <= shared_d; + dirty_q <= dirty_d; + response_error_q <= response_error_d; end end - assign dirty_o = |dirty_q; - assign shared_o = |shared_q; - assign data_available_o = data_available_q; + for (genvar i = 0; i < NoMstPorts; i = i + 1) begin + assign cr_handshake_d[i] = cr_handshake[i] ? 1'b1 : cr_handshake_q[i]; + assign data_available_d[i] = cr_handshake[i] ? m2s_resp_i[i].cr_resp.dataTransfer : data_available_q[i]; + assign shared_d[i] = cr_handshake[i] ? m2s_resp_i[i].cr_resp.isShared : shared_q[i]; + assign dirty_d[i] = cr_handshake[i] ? m2s_resp_i[i].cr_resp.passDirty : dirty_q[i]; + assign response_error_d[i] = cr_handshake[i] ? m2s_resp_i[i].cr_resp.error : response_error_q[i]; + end + + assign dirty_o = |dirty_d; + assign shared_o = |shared_d; + assign data_available_o = data_available_d; - logic [MstIdxBits-1:0] first_responder_q; - logic snoop_resp_found_q; + logic [MstIdxBits-1:0] first_responder_q, first_responder_d; + logic snoop_resp_found_q, snoop_resp_found_d; always_ff @ (posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin @@ -277,30 +281,44 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; first_responder_q <= '0; snoop_resp_found_q <= 1'b0; end else if (!snoop_resp_found_q) begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin + first_responder_q <= first_responder_d; + snoop_resp_found_q <= snoop_resp_found_d; + end + end + + always_comb begin + first_responder_d = first_responder_q; + snoop_resp_found_d = snoop_resp_found_q; + for (int i = 0; i < NoMstPorts; i = i + 1) begin if(cr_handshake[i] & m2s_resp_i[i].cr_resp.dataTransfer & !m2s_resp_i[i].cr_resp.error) begin - first_responder_q <= i[MstIdxBits-1:0]; - snoop_resp_found_q <= 1'b1; - break; + first_responder_d = i[MstIdxBits-1:0]; + snoop_resp_found_d = 1'b1; + break; end - end end end - assign first_responder_o = first_responder_q; + assign first_responder_o = first_responder_d; snoop_ac_t ac_q, ac_d; + logic mu_done_d, mu_done_q; + logic su_done_d, su_done_q; + // ---------------------- // Current State Block // ---------------------- always_ff @(posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin ac_state_q <= AC_IDLE; - ac_q <= '0; + ac_q <= '0; + mu_done_q <= '0; + su_done_q <= '0; end else begin ac_state_q <= ac_state_d; - ac_q <= ac_d; + ac_q <= ac_d; + mu_done_q <= mu_done_d; + su_done_q <= su_done_d; end end @@ -358,15 +376,14 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign cr_aw_initiator = 1 << aw_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; assign cr_ar_initiator = 1 << ar_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; - assign cr_aw_mask = cr_aw_initiator | cr_handshake_q; - assign cr_ar_mask = cr_ar_initiator | cr_handshake_q; - - assign cr_done = (mu_valid_o && mu_ready_i) || (su_valid_o && su_ready_i); always_comb begin - su_valid_o = 1'b0; - mu_valid_o = 1'b0; + mu_done_d = mu_done_q; + su_done_d = su_done_q; + + su_req_o = 1'b0; + mu_req_o = 1'b0; su_op_o = READ_SNP_DATA; mu_op_o = SEND_AXI_REQ_R; @@ -375,77 +392,89 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; cr_out_ready = '0; + cr_done = 1'b0; + if (!cr_cmd_fifo_empty) begin case (cr_cmd_fifo_out) - RESP_R: begin // wait for all CR handshakes - if (cr_ar_mask == '1) begin + if (cr_handshake_d == ~cr_ar_initiator) begin - if(|(data_available_q & ~response_error_q)) begin + if(|(data_available_d & ~response_error_d)) begin su_op_o = READ_SNP_DATA; - su_valid_o = 1'b1; - if (su_ready_i) begin + su_req_o = 1'b1; + if (su_gnt_i) begin ar_fifo_pop = 1'b1; + cr_done = 1'b1; end end else begin mu_op_o = SEND_AXI_REQ_R; - mu_valid_o = 1'b1; - if (mu_ready_i) begin + mu_req_o = 1'b1; + if (mu_gnt_i) begin ar_fifo_pop = 1'b1; + cr_done = 1'b1; end end end - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - cr_out_ready[n] = !cr_ar_mask[n]; + cr_out_ready = ~(cr_handshake_q | cr_ar_initiator); end INVALID_R: begin // TODO: sending the ack R transaction could be moved from // the snoop unit directly here // wait for all CR handshakes - if (cr_ar_mask == '1) begin + if (cr_handshake_d == ~cr_ar_initiator) begin - if (mu_ready_i && (ar_fifo_out.lock || su_ready_i)) begin - ar_fifo_pop = 1'b1; - su_valid_o = !ar_fifo_out.lock; - end + su_req_o = !ar_fifo_out.lock && !su_done_q; + su_done_d = su_gnt_i || su_done_q; - if(|(data_available_q & ~response_error_q)) begin + if(|(data_available_d & ~response_error_d)) begin mu_op_o = SEND_AXI_REQ_WRITE_BACK_R; - mu_valid_o = (ar_fifo_out.lock || su_ready_i); + mu_req_o = !mu_done_q; + cr_done = ar_fifo_out.lock ? mu_gnt_i : + &({mu_gnt_i, su_gnt_i} | {mu_done_q, su_done_q}); end else if (ar_fifo_out.lock) begin mu_op_o = SEND_AXI_REQ_R; - mu_valid_o = (ar_fifo_out.lock || su_ready_i); + mu_req_o = !mu_done_q; + cr_done = mu_gnt_i; + end else begin + cr_done = su_gnt_i; + end + + mu_done_d = mu_gnt_i || mu_done_q; + + if (cr_done) begin + ar_fifo_pop = 1'b1; + mu_done_d = 1'b0; + su_done_d = 1'b0; end end su_op_o = SEND_INVALID_ACK_R; - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - cr_out_ready[n] = !cr_ar_mask[n]; + cr_out_ready = ~(cr_handshake_q | cr_ar_initiator); end INVALID_W: begin // wait for all CR handshakes - if (cr_aw_mask == '1) begin + if (cr_handshake_d == ~cr_aw_initiator) begin - mu_valid_o = 1'b1; + mu_req_o = 1'b1; - if (mu_ready_i) begin + if (mu_gnt_i) begin aw_fifo_pop = 1'b1; + cr_done = 1'b1; end - if(|(data_available_q & ~response_error_q)) begin + if(|(data_available_d & ~response_error_d)) begin mu_op_o = SEND_AXI_REQ_WRITE_BACK_W; end else begin mu_op_o = SEND_AXI_REQ_W; end end - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - cr_out_ready[n] = !cr_aw_mask[n]; + cr_out_ready = ~(cr_handshake_q | cr_aw_initiator); end endcase end diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 5df7b1d..afb2c58 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -41,8 +41,8 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; input slv_req_t ccu_req_holder_i, - output logic mu_ready_o, - input logic mu_valid_i, + output logic mu_gnt_o, + input logic mu_req_i, input mu_op_e mu_op_i, input logic [MstIdxBits-1:0] first_responder_i ); @@ -54,8 +54,8 @@ localparam W_FIFO_DEPTH = 2; mst_req_t ccu_req_out; mst_resp_t ccu_resp_in; -slv_req_t ccu_req_holder_q; -logic [MstIdxBits-1:0] first_responder_q; +slv_req_t ccu_req_holder_q, ccu_req_holder_d; +logic [MstIdxBits-1:0] first_responder_q, first_responder_d; logic cd_fifo_pop, cd_fifo_empty; logic [AxiDataWidth-1:0] cd_fifo_data_out; @@ -64,9 +64,9 @@ always_ff @(posedge clk_i , negedge rst_ni) begin if(!rst_ni) begin ccu_req_holder_q <= '0; first_responder_q <= '0; - end else if (mu_ready_o && mu_valid_i) begin - ccu_req_holder_q <= ccu_req_holder_i; - first_responder_q <= first_responder_i; + end else if (mu_gnt_o && mu_req_i) begin + ccu_req_holder_q <= ccu_req_holder_d; + first_responder_q <= first_responder_d; end end @@ -96,10 +96,13 @@ logic w_fifo_full, w_fifo_empty; logic w_fifo_push, w_fifo_pop; w_state_t w_fifo_data_in, w_fifo_data_out; +assign first_responder_d = !ax_busy_q ? first_responder_i : first_responder_q; +assign ccu_req_holder_d = !ax_busy_q ? ccu_req_holder_i : ccu_req_holder_q; +assign mu_gnt_o = !ax_busy_q ? mu_req_i : 1'b0; + always_comb begin - mu_ready_o = 1'b0; ax_busy_d = ax_busy_q; - ax_op_d = ax_op_q; + ax_op_d = ax_busy_q ? ax_op_q : mu_op_i; ar_out = '0; aw_out = '0; @@ -109,114 +112,110 @@ always_comb begin w_fifo_push = 1'b0; w_fifo_data_in = W_PASSTHROUGH; - case (ax_busy_q) - 1'b0: begin - mu_ready_o = 1'b1; - if (mu_valid_i) begin - ax_op_d = mu_op_i; - ax_busy_d = 1'b1; + if (mu_req_i || ax_busy_q) begin + ax_busy_d = 1'b1; + case (ax_op_d) + SEND_AXI_REQ_R: begin + ar_valid_out = 'b1; + ar_out = ccu_req_holder_d.ar; + if (ccu_resp_in.ar_ready) begin + ax_busy_d = 1'b0; + end end - end - 1'b1: begin - case (ax_op_q) - SEND_AXI_REQ_R: begin - ar_valid_out = 'b1; - ar_out = ccu_req_holder_q.ar; - if (ccu_resp_in.ar_ready) begin + SEND_AXI_REQ_WRITE_BACK_R: begin + // send writeback request + aw_valid_out = !w_fifo_full; + aw_out = '0; //default + aw_out.addr = ccu_req_holder_d.ar.addr; + aw_out.addr[3:0] = 4'b0; // writeback is always full cache line + aw_out.size = 2'b11; + aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + aw_out.id = {1'b1, first_responder_d, ccu_req_holder_d.ar.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations + aw_out.len = DcacheLineWords-1; + // WRITEBACK + aw_out.domain = 2'b00; + aw_out.snoop = 3'b011; + + w_fifo_data_in = W_FROM_FIFO; + + if (ccu_resp_in.aw_ready && !w_fifo_full) begin + w_fifo_push = 1'b1; + if (ccu_req_holder_d.ar.lock) begin + // Blocking behavior for AMO operations + // TODO: check if truly needed + ax_op_d = AMO_WAIT_WB_R; + end else begin ax_busy_d = 1'b0; end end - SEND_AXI_REQ_WRITE_BACK_R: begin - // send writeback request - aw_valid_out = !w_fifo_full; - aw_out = '0; //default - aw_out.addr = ccu_req_holder_q.ar.addr; - aw_out.addr[3:0] = 4'b0; // writeback is always full cache line - aw_out.size = 2'b11; - aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - aw_out.id = {1'b1, first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations - aw_out.len = DcacheLineWords-1; - // WRITEBACK - aw_out.domain = 2'b00; - aw_out.snoop = 3'b011; - - w_fifo_data_in = W_FROM_FIFO; - - if (ccu_resp_in.aw_ready && !w_fifo_full) begin - w_fifo_push = 1'b1; - if (ccu_req_holder_q.ar.lock) - // Blocking behavior for AMO operations - // TODO: check if truly needed - ax_op_d = AMO_WAIT_WB_R; - else - ax_busy_d = 1'b0; - end - end - SEND_AXI_REQ_W: begin - aw_valid_out = !w_fifo_full; - aw_out = ccu_req_holder_q.aw; - - w_fifo_data_in = W_PASSTHROUGH; - - if (ccu_resp_in.aw_ready && !w_fifo_full) begin - w_fifo_push = 1'b1; - if (ccu_req_holder_q.aw.atop[5]) - // Blocking behavior for AMO operations - // TODO: check if truly needed - ax_op_d = AMO_WAIT_READ; - else - ax_busy_d = 1'b0; - end - end - SEND_AXI_REQ_WRITE_BACK_W: begin - // send writeback request - aw_valid_out = !w_fifo_full; - aw_out = '0; //default - aw_out.addr = ccu_req_holder_q.aw.addr; - aw_out.addr[3:0] = 4'b0; // writeback is always full cache line - aw_out.size = 2'b11; - aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - aw_out.id = {1'b1, first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations - aw_out.len = DcacheLineWords-1; - // WRITEBACK - aw_out.domain = 2'b00; - aw_out.snoop = 3'b011; - - w_fifo_data_in = W_FROM_FIFO; - - if (ccu_resp_in.aw_ready && !w_fifo_full) begin - w_fifo_push = 1'b1; - if (ccu_req_holder_q.aw.atop[5]) - ax_op_d = AMO_WAIT_WB_W; - else - ax_op_d = SEND_AXI_REQ_W; - end - end - AMO_WAIT_READ: begin - if(ccu_resp_in.r_valid && ccu_req_i.r_ready && ccu_resp_in.r.last - && ccu_resp_in.r.id == ccu_req_holder_q.aw.id) + end + SEND_AXI_REQ_W: begin + aw_valid_out = !w_fifo_full; + aw_out = ccu_req_holder_d.aw; + + w_fifo_data_in = W_PASSTHROUGH; + + if (ccu_resp_in.aw_ready && !w_fifo_full) begin + w_fifo_push = 1'b1; + if (ccu_req_holder_d.aw.atop[5]) begin + // Blocking behavior for AMO operations + // TODO: check if truly needed + ax_op_d = AMO_WAIT_READ; + end else begin ax_busy_d = 1'b0; + end end - AMO_WAIT_WB_R: begin - if(ccu_resp_in.b_valid && ccu_req_out.b_ready - && ccu_resp_in.b.id == {1'b1, first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}) - ax_op_d = SEND_AXI_REQ_R; - end - AMO_WAIT_WB_W: begin - if(ccu_resp_in.b_valid && ccu_req_out.b_ready && - ccu_resp_in.b.id == {1'b1, first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}) + end + SEND_AXI_REQ_WRITE_BACK_W: begin + // send writeback request + aw_valid_out = !w_fifo_full; + aw_out = '0; //default + aw_out.addr = ccu_req_holder_d.aw.addr; + aw_out.addr[3:0] = 4'b0; // writeback is always full cache line + aw_out.size = 2'b11; + aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + aw_out.id = {1'b1, first_responder_d, ccu_req_holder_d.aw.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations + aw_out.len = DcacheLineWords-1; + // WRITEBACK + aw_out.domain = 2'b00; + aw_out.snoop = 3'b011; + + w_fifo_data_in = W_FROM_FIFO; + + if (ccu_resp_in.aw_ready && !w_fifo_full) begin + w_fifo_push = 1'b1; + ax_busy_d = 1'b1; + if (ccu_req_holder_d.aw.atop[5]) + ax_op_d = AMO_WAIT_WB_W; + else ax_op_d = SEND_AXI_REQ_W; end - endcase - end - endcase + end + AMO_WAIT_READ: begin + if(ccu_resp_in.r_valid && ccu_req_i.r_ready && ccu_resp_in.r.last + && ccu_resp_in.r.id == ccu_req_holder_q.aw.id) + ax_busy_d = 1'b0; + end + AMO_WAIT_WB_R: begin + if(ccu_resp_in.b_valid && ccu_req_out.b_ready + && ccu_resp_in.b.id == {1'b1, first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}) + ax_op_d = SEND_AXI_REQ_R; + end + AMO_WAIT_WB_W: begin + if(ccu_resp_in.b_valid && ccu_req_out.b_ready && + ccu_resp_in.b.id == {1'b1, first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}) + ax_op_d = SEND_AXI_REQ_W; + end + endcase + end end + assign cd_fifo_pop = w_fifo_data_out == W_FROM_FIFO && ccu_resp_in.w_ready && ccu_req_out.w_valid; fifo_v3 #( - .FALL_THROUGH(0), + .FALL_THROUGH(1), .DATA_WIDTH(AxiDataWidth), .DEPTH(CD_FIFO_DEPTH) ) cd_memory_fifo_i ( @@ -257,7 +256,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin end fifo_v3 #( - .FALL_THROUGH(0), + .FALL_THROUGH(1), .DEPTH(W_FIFO_DEPTH), .dtype(w_state_t) ) w_fifo_i ( diff --git a/src/ccu_ctrl_snoop_unit.sv b/src/ccu_ctrl_snoop_unit.sv index 1d2bfdd..3ca24dd 100644 --- a/src/ccu_ctrl_snoop_unit.sv +++ b/src/ccu_ctrl_snoop_unit.sv @@ -32,8 +32,8 @@ module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; output logic cd_fifo_full_o, input slv_req_t ccu_req_holder_i, - output logic su_ready_o, - input logic su_valid_i, + output logic su_gnt_o, + input logic su_req_i, input su_op_e su_op_i, input logic shared_i, input logic dirty_i @@ -41,157 +41,131 @@ module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; localparam FIFO_DEPTH = 2; -enum { - IDLE, - SEND_LOWER_HALF, - SEND_UPPER_HALF, - WAIT_R_READY, - WAIT_CD_LAST -} state_d, state_q; - logic [AxiDataWidth-1:0] fifo_data_in, fifo_data_out; logic [$clog2(DcacheLineWords)-1:0] fifo_usage; -logic sample_dec_data; +logic su_busy_d, su_busy_q; +logic r_last_d, r_last_q; +su_op_e su_op_d, su_op_q; -slv_req_t ccu_req_holder_q; -logic shared_q; -logic dirty_q; +slv_req_t ccu_req_holder_q, ccu_req_holder_d; +logic shared_q, shared_d; +logic dirty_q, dirty_d; always_ff @(posedge clk_i , negedge rst_ni) begin if(!rst_ni) begin ccu_req_holder_q <= '0; shared_q <= '0; dirty_q <= '0; - end else if(sample_dec_data) begin - ccu_req_holder_q <= ccu_req_holder_i; - shared_q <= shared_i; - dirty_q <= dirty_i; + end else begin + ccu_req_holder_q <= ccu_req_holder_d; + shared_q <= shared_d; + dirty_q <= dirty_d; end end always_ff @(posedge clk_i , negedge rst_ni) begin if(!rst_ni) begin - state_q <= IDLE; + su_busy_q <= '0; + su_op_q <= READ_SNP_DATA; + r_last_q <= '0; end else begin - state_q <= state_d; + su_busy_q <= su_busy_d; + su_op_q <= su_op_d; + r_last_q <= r_last_d; end end logic ar_addr_offset; -assign ar_addr_offset = ccu_req_holder_q.ar.addr[3]; +assign ar_addr_offset = ccu_req_holder_i.ar.addr[3]; logic fifo_full, fifo_empty, fifo_push, fifo_pop; assign cd_fifo_full_o = fifo_full; -always_comb begin - - state_d = state_q; +assign ccu_req_holder_d = su_busy_q ? ccu_req_holder_q : ccu_req_holder_i; +assign shared_d = su_busy_q ? shared_q : shared_i; +assign dirty_d = su_busy_q ? dirty_q : dirty_i; +assign su_op_d = su_busy_q ? su_op_q : su_op_i; - su_ready_o = 1'b0; +always_comb begin + su_gnt_o = 1'b0; r_o = '0; r_valid_o = 1'b0; fifo_pop = 1'b0; - sample_dec_data = 1'b0; - - case (state_q) - IDLE: begin - su_ready_o = 1'b1; - if (su_valid_i) begin - if (su_op_i == SEND_INVALID_ACK_R) begin - r_o = '0; - r_o.id = ccu_req_holder_i.ar.id; - r_o.last = 'b1; - r_valid_o = 'b1; - if (!r_ready_i) begin - state_d = WAIT_R_READY; - sample_dec_data = 1'b1; + su_busy_d = su_busy_q; + r_last_d = r_last_q; + + if (su_req_i || su_busy_q) begin + su_gnt_o = !su_busy_q; + su_busy_d = 1'b1; + case (su_op_d) + READ_SNP_DATA: begin + // Prepare request + r_o.data = fifo_data_out; + r_o.id = ccu_req_holder_d.ar.id; + r_o.resp[3] = shared_d; // update if shared + r_o.resp[2] = dirty_d; // update if any line dirty + r_o.last = r_last_q; // No further transactions + + if (r_last_q) begin + r_valid_o = !fifo_empty; + if (r_ready_i && !fifo_empty) begin + fifo_pop = 1'b1; + su_busy_d = 1'b0; + r_last_d = 1'b0; end - end else if (su_op_i == READ_SNP_DATA) begin - sample_dec_data = 1'b1; - state_d = SEND_LOWER_HALF; - end - end - end - - SEND_LOWER_HALF: begin - // Prepare request - r_o.data = fifo_data_out; - r_o.id = ccu_req_holder_q.ar.id; - r_o.resp[3] = shared_q; // update if shared - r_o.resp[2] = dirty_q; // update if any line dirty - - if (!fifo_empty) begin - // Single data request - if (ccu_req_holder_q.ar.len == 0) begin - // The lower 64 bits are required - if (!ar_addr_offset) begin - r_o.last = 1'b1; - r_valid_o = 1'b1; // There is something to send - if (r_ready_i) begin - state_d = WAIT_CD_LAST; + end else begin + // Single data request + if (ccu_req_holder_d.ar.len == 0) begin + // The lower 64 bits are required + if (!ar_addr_offset) begin + r_o.last = 1'b1; + r_valid_o = !fifo_empty; // There is something to send + if (r_ready_i && !fifo_empty) begin + fifo_pop = 1'b1; + su_busy_d = 1'b0; + end + end else begin + // The lower 64 bits are not needed + // Consume them and move the upper 64 bits + r_last_d = 1'b1; fifo_pop = 1'b1; end end else begin - // The lower 64 bits are not needed - // Consume them and move the upper 64 bits - state_d = SEND_UPPER_HALF; - fifo_pop = 1'b1; - end - end else begin - // Full cacheline request - r_o.last = 1'b0; - r_valid_o = 1'b1; // There is something to send - if (r_ready_i) begin - state_d = SEND_UPPER_HALF; - fifo_pop = 1'b1; + // Full cacheline request + r_valid_o = !fifo_empty; // There is something to send + if (r_ready_i && !fifo_empty) begin + fifo_pop = 1'b1; + r_last_d = 1'b1; + end end end end - end - - SEND_UPPER_HALF: begin - // Prepare request - r_o.data = fifo_data_out; - r_o.id = ccu_req_holder_q.ar.id; - r_o.resp[3] = shared_q; // Update if shared - r_o.resp[2] = dirty_q; // Update if any line dirty - r_o.last = 1'b1; // No further transactions - - if (!fifo_empty) begin - r_valid_o = 1'b1; - + SEND_INVALID_ACK_R: begin + r_o = '0; + r_o.id = ccu_req_holder_d.ar.id; + r_o.last = 'b1; + r_valid_o = 'b1; if (r_ready_i) begin - fifo_pop = 1'b1; - state_d = IDLE; + su_busy_d = 1'b0; end end - end - - WAIT_R_READY: begin - r_o = '0; - r_o.id = ccu_req_holder_q.ar.id; - r_o.last = 'b1; - r_valid_o = 'b1; - - if (r_ready_i) - state_d = IDLE; - end - endcase + endcase + end end assign fifo_push = cd_handshake_i; -assign fifo_flush = 1'b0; +assign fifo_flush = !(su_req_i || su_busy_q); assign fifo_data_in = cd_i.data; fifo_v3 #( - .FALL_THROUGH(0), + .FALL_THROUGH(1), .DATA_WIDTH(AxiDataWidth), .DEPTH(FIFO_DEPTH) ) cd_snoop_fifo_i ( From 912cadae616a82867ebd582792f804486fa752b2 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 13 May 2024 16:37:44 +0200 Subject: [PATCH 027/109] Add performance events --- src/ace_ccu_top.sv | 8 ++++++ src/ccu_ctrl.sv | 13 ++++++--- src/ccu_ctrl_decoder.sv | 58 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 74 insertions(+), 5 deletions(-) diff --git a/src/ace_ccu_top.sv b/src/ace_ccu_top.sv index ff47c48..ea7048d 100644 --- a/src/ace_ccu_top.sv +++ b/src/ace_ccu_top.sv @@ -55,6 +55,7 @@ module ace_ccu_top input logic clk_i, input logic rst_ni, input logic test_i, + output logic [Cfg.NoSlvPorts-1:0][7:0] perf_evt_o, input slv_req_t [Cfg.NoSlvPorts-1:0] slv_ports_req_i, output slv_resp_t [Cfg.NoSlvPorts-1:0] slv_ports_resp_o, output snoop_req_t [Cfg.NoSlvPorts-1:0] slv_snp_req_o, @@ -224,6 +225,10 @@ axi_mux #( .mst_resp_i ( ccu_resps_mux_i ) ); +logic [7:0] perf_evt_temp; +for (genvar i = 0; i < Cfg.NoSlvPorts; i++) + assign perf_evt_o[i] = perf_evt_temp; + ccu_ctrl #( .DcacheLineWidth ( Cfg.DcacheLineWidth ), .DCacheIndexWidth( Cfg.DCacheIndexWidth ), @@ -253,6 +258,7 @@ ccu_ctrl #( ) ccu_ctrl_i ( .clk_i, .rst_ni, + .perf_evt_o ( perf_evt_temp ), .ccu_req_i ( ccu_reqs_mux_o ), .ccu_resp_o ( ccu_resps_mux_i ), .ccu_req_o ( ccu_reqs_o ), @@ -274,6 +280,7 @@ module ace_ccu_top_intf input logic clk_i, input logic rst_ni, input logic test_i, + output logic [Cfg.NoSlvPorts-1:0][7:0] perf_evt_o, SNOOP_BUS.Slave snoop_ports [Cfg.NoSlvPorts-1:0], ACE_BUS.Slave slv_ports [Cfg.NoSlvPorts-1:0], AXI_BUS.Master mst_ports @@ -383,6 +390,7 @@ module ace_ccu_top_intf .clk_i, .rst_ni, .test_i, + .perf_evt_o, .slv_ports_req_i ( slv_ace_reqs ), .slv_ports_resp_o ( slv_ace_resps ), .slv_snp_req_o ( snoop_reqs ), diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 346c2ad..4948223 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -44,7 +44,9 @@ module ccu_ctrl import ccu_ctrl_pkg::*; import axi_pkg::*; input mst_resp_t ccu_resp_i, // Snoop channel resuest and response output snoop_req_t [NoMstPorts-1:0] s2m_req_o, - input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i + input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i, + // Perf counters + output logic [7:0] perf_evt_o ); localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; @@ -189,7 +191,10 @@ ccu_ctrl_decoder #( .b_queue_push_o (b_queue_push), .r_queue_push_o (r_queue_push), .b_queue_aw_o (b_queue_aw), - .r_queue_ar_o (r_queue_ar) + .r_queue_ar_o (r_queue_ar), + + .perf_evt_o (perf_evt_o) + ); ccu_ctrl_snoop_unit #( @@ -381,7 +386,7 @@ assign r_inp_req = r_queue_push; id_queue #( .ID_WIDTH (SlvAxiIDWidth+1), - .CAPACITY (4), + .CAPACITY (6), .FULL_BW (1), .data_t (id_queue_data_t) ) b_id_queue ( @@ -409,7 +414,7 @@ id_queue #( id_queue #( .ID_WIDTH (SlvAxiIDWidth+1), - .CAPACITY (4), + .CAPACITY (6), .FULL_BW (1), .data_t (id_queue_data_t) ) r_id_queue ( diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index f74ee71..3f9f307 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -5,6 +5,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; parameter int unsigned AxiAddrWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, + parameter bit PerfCounters = 1, parameter type slv_aw_chan_t = logic, parameter type w_chan_t = logic, parameter type slv_b_chan_t = logic, @@ -57,7 +58,9 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; input logic b_collision_i, input logic r_collision_i, - input logic cd_fifo_stall_i + input logic cd_fifo_stall_i, + + output logic [7:0] perf_evt_o ); logic [NoMstPorts-1:0] ac_initiator; @@ -552,4 +555,57 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; .pop_i (aw_fifo_pop) ); + if (PerfCounters) begin : gen_perf_events + logic perf_snoop_hit; + logic perf_snoop_miss; + logic perf_writeback; + logic perf_collision_cycles; + logic perf_collision_req; + logic perf_generic_stall; + logic perf_ac_busy_stall; + logic perf_mu_stall; + logic generic_stall; + + logic collision_req_observed_q, collision_req_observed_d; + + assign generic_stall = |{ + cr_cmd_fifo_full, + cd_fifo_stall_i, + arb_idx_out == 0 && (r_queue_full_i || ar_fifo_full), + arb_idx_out == 1 && (b_queue_full_i || aw_fifo_full) + }; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + collision_req_observed_q <= '0; + end else begin + collision_req_observed_q <= collision_req_observed_d; + end + end + + // Perf counters + assign perf_snoop_hit = su_req_o && su_gnt_i && cr_cmd_fifo_out == RESP_R && su_op_o == READ_SNP_DATA; + assign perf_snoop_miss = mu_req_o && mu_gnt_i && cr_cmd_fifo_out == RESP_R && mu_op_o == SEND_AXI_REQ_R; + assign perf_writeback = mu_req_o && mu_gnt_i && mu_op_o inside {SEND_AXI_REQ_WRITE_BACK_W, SEND_AXI_REQ_WRITE_BACK_R}; + assign perf_collision_cycles = ac_state_q == AC_IDLE && arb_req_out && !generic_stall && collision; + assign perf_collision_req = perf_collision_cycles && !collision_req_observed_q; + assign perf_generic_stall = ac_state_q == AC_IDLE && arb_req_out && generic_stall; + assign perf_ac_busy_stall = ac_state_q == AC_BUSY && arb_req_out; + assign perf_mu_stall = mu_req_o && !mu_gnt_i; + + assign perf_evt_o = { + perf_snoop_hit, + perf_snoop_miss, + perf_writeback, + perf_collision_cycles, + perf_collision_req, + perf_generic_stall, + perf_ac_busy_stall, + perf_mu_stall + }; + + assign collision_req_observed_d = perf_collision_cycles; + end else begin + assign perf_evt_o = '0; + end endmodule \ No newline at end of file From 13f79f9543da7a9e8e0ced302d71b12241193bc2 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 14 May 2024 21:39:39 +0200 Subject: [PATCH 028/109] Allow multiple consecutive AC requests --- src/ccu_ctrl_decoder.sv | 82 +++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 49 deletions(-) diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 3f9f307..1c20686 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -63,8 +63,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; output logic [7:0] perf_evt_o ); - logic [NoMstPorts-1:0] ac_initiator; - logic [NoMstPorts-1:0] ac_handshake_q, ac_handshake; + logic [NoMstPorts-1:0] ac_handshake_q, ac_handshake_d, ac_handshake; logic [NoMstPorts-1:0] cr_aw_initiator, cr_ar_initiator; logic [NoMstPorts-1:0] cr_handshake_q, cr_handshake_d, cr_handshake; @@ -72,6 +71,8 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; typedef enum logic [1:0] { INVALID_W, INVALID_R, RESP_R } cr_cmd_fifo_t; logic stall; + logic ac_done; + logic cr_done; // AW FIFO logic aw_fifo_empty, aw_fifo_full; @@ -93,8 +94,6 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; AC_BUSY } ac_state_d, ac_state_q; - logic decode_r, decode_w; - // Hold incoming ACE request slv_aw_chan_t aw_holder; @@ -200,7 +199,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; // AW requests, ID queue or FIFO full arb_idx_out == 1 && (b_queue_full_i || aw_fifo_full), // AC is busy - ac_state_q == AC_BUSY + ac_state_q == AC_BUSY && !ac_done }; assign arb_gnt_out = !stall; assign lookup_req_o = arb_req_out; @@ -214,28 +213,21 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign cr_handshake[i] = m2s_resp_i[i].cr_valid & s2m_req_o[i].cr_ready; end - logic cr_done; - snoop_ac_t [NoMstPorts-1:0] ac_out; logic [NoMstPorts-1:0] ac_out_valid; logic [NoMstPorts-1:0] cr_out_ready; // Hold snoop AC handshakes - for (genvar i = 0; i < NoMstPorts; i = i + 1) begin - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - ac_handshake_q[i] <= '0; - end else if(decode_r || decode_w) begin - ac_handshake_q[i] <= ac_initiator[i]; - end else if(ac_state_q == AC_BUSY) begin - if (ac_handshake[i]) - ac_handshake_q[i] <= 1'b1; - end else begin - ac_handshake_q[i] <= '0; - end + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ac_handshake_q <= '0; + end else begin + ac_handshake_q <= ac_handshake_d; end end + assign ac_done = (ac_handshake_q | ac_handshake) == '1; + // Hold snoop CR handshakes logic [NoMstPorts-1:0] data_available_q, response_error_q, shared_q, dirty_q; logic [NoMstPorts-1:0] data_available_d, response_error_d, shared_d, dirty_d; @@ -336,43 +328,35 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; // Next state ac_state_d = ac_state_q; - - // Ctrl flags - decode_r = 1'b0; - decode_w = 1'b0; + ac_handshake_d = ac_handshake_q; cr_cmd_fifo_in = RESP_R; aw_fifo_push = 1'b0; ar_fifo_push = 1'b0; - ac_initiator = '0; - - case (ac_state_q) - AC_IDLE: begin - if (arb_req_out && !stall) begin - ac_d = arb_ac_out; - ac_state_d = AC_BUSY; - if (arb_idx_out == 1) begin - decode_w = 1'b1; - aw_fifo_push = 1'b1; - cr_cmd_fifo_in = INVALID_W; - ac_initiator = aw_initiator; - end else if (arb_idx_out == 0) begin - decode_r = 1'b1; - ar_fifo_push = 1'b1; - cr_cmd_fifo_in = send_invalid_r ? INVALID_R : RESP_R; - ac_initiator = ar_initiator; - end - end - end + if (ac_state_q == AC_BUSY) begin + ac_out_valid = ~ac_handshake_q; + ac_handshake_d = ac_handshake | ac_handshake_q; + end - AC_BUSY: begin - if ((ac_handshake_q | ac_handshake) == '1) begin - ac_state_d = AC_IDLE; + if (ac_state_q == AC_IDLE || ac_done) begin + if (arb_req_out && !stall) begin + ac_d = arb_ac_out; + ac_state_d = AC_BUSY; + if (arb_idx_out == 1) begin + aw_fifo_push = 1'b1; + cr_cmd_fifo_in = INVALID_W; + ac_handshake_d = aw_initiator; + end else if (arb_idx_out == 0) begin + ar_fifo_push = 1'b1; + cr_cmd_fifo_in = send_invalid_r ? INVALID_R : RESP_R; + ac_handshake_d = ar_initiator; end - ac_out_valid = ~ac_handshake_q; + end else begin + ac_state_d = AC_IDLE; + ac_handshake_d = '0; end - endcase + end end assign ac_out = {NoMstPorts{ac_q}}; @@ -590,7 +574,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign perf_collision_cycles = ac_state_q == AC_IDLE && arb_req_out && !generic_stall && collision; assign perf_collision_req = perf_collision_cycles && !collision_req_observed_q; assign perf_generic_stall = ac_state_q == AC_IDLE && arb_req_out && generic_stall; - assign perf_ac_busy_stall = ac_state_q == AC_BUSY && arb_req_out; + assign perf_ac_busy_stall = ac_state_q == AC_BUSY && arb_req_out && !ac_done; assign perf_mu_stall = mu_req_o && !mu_gnt_i; assign perf_evt_o = { From 8de8e7c6877a835d22ea598bf7f61d4c0891e7c4 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 14 May 2024 21:46:55 +0200 Subject: [PATCH 029/109] Replace enum for AC state --- src/ccu_ctrl_decoder.sv | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 1c20686..7e5191b 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -89,10 +89,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; logic cr_cmd_fifo_pop, cr_cmd_fifo_push; cr_cmd_fifo_t cr_cmd_fifo_in, cr_cmd_fifo_out; - enum { - AC_IDLE, - AC_BUSY - } ac_state_d, ac_state_q; + logic ac_busy_q, ac_busy_d; // Hold incoming ACE request @@ -199,7 +196,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; // AW requests, ID queue or FIFO full arb_idx_out == 1 && (b_queue_full_i || aw_fifo_full), // AC is busy - ac_state_q == AC_BUSY && !ac_done + ac_busy_q && !ac_done }; assign arb_gnt_out = !stall; assign lookup_req_o = arb_req_out; @@ -305,15 +302,15 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; // ---------------------- always_ff @(posedge clk_i, negedge rst_ni) begin if(!rst_ni) begin - ac_state_q <= AC_IDLE; - ac_q <= '0; - mu_done_q <= '0; - su_done_q <= '0; + ac_busy_q <= '0; + ac_q <= '0; + mu_done_q <= '0; + su_done_q <= '0; end else begin - ac_state_q <= ac_state_d; - ac_q <= ac_d; - mu_done_q <= mu_done_d; - su_done_q <= su_done_d; + ac_busy_q <= ac_busy_d; + ac_q <= ac_d; + mu_done_q <= mu_done_d; + su_done_q <= su_done_d; end end @@ -327,22 +324,22 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; ac_out_valid = '0; // Next state - ac_state_d = ac_state_q; + ac_busy_d = ac_busy_q; ac_handshake_d = ac_handshake_q; cr_cmd_fifo_in = RESP_R; aw_fifo_push = 1'b0; ar_fifo_push = 1'b0; - if (ac_state_q == AC_BUSY) begin + if (ac_busy_q) begin ac_out_valid = ~ac_handshake_q; ac_handshake_d = ac_handshake | ac_handshake_q; end - if (ac_state_q == AC_IDLE || ac_done) begin + if (!ac_busy_q || ac_done) begin if (arb_req_out && !stall) begin ac_d = arb_ac_out; - ac_state_d = AC_BUSY; + ac_busy_d = 1'b1; if (arb_idx_out == 1) begin aw_fifo_push = 1'b1; cr_cmd_fifo_in = INVALID_W; @@ -353,7 +350,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; ac_handshake_d = ar_initiator; end end else begin - ac_state_d = AC_IDLE; + ac_busy_d = 1'b0; ac_handshake_d = '0; end end @@ -571,10 +568,10 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign perf_snoop_hit = su_req_o && su_gnt_i && cr_cmd_fifo_out == RESP_R && su_op_o == READ_SNP_DATA; assign perf_snoop_miss = mu_req_o && mu_gnt_i && cr_cmd_fifo_out == RESP_R && mu_op_o == SEND_AXI_REQ_R; assign perf_writeback = mu_req_o && mu_gnt_i && mu_op_o inside {SEND_AXI_REQ_WRITE_BACK_W, SEND_AXI_REQ_WRITE_BACK_R}; - assign perf_collision_cycles = ac_state_q == AC_IDLE && arb_req_out && !generic_stall && collision; + assign perf_collision_cycles = !ac_busy_q && arb_req_out && !generic_stall && collision; assign perf_collision_req = perf_collision_cycles && !collision_req_observed_q; - assign perf_generic_stall = ac_state_q == AC_IDLE && arb_req_out && generic_stall; - assign perf_ac_busy_stall = ac_state_q == AC_BUSY && arb_req_out && !ac_done; + assign perf_generic_stall = !ac_busy_q && arb_req_out && generic_stall; + assign perf_ac_busy_stall = ac_busy_q && arb_req_out && !ac_done; assign perf_mu_stall = mu_req_o && !mu_gnt_i; assign perf_evt_o = { From 74333a32e61b8144b6f1eca7c9740f7f7c0828ce Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 14 May 2024 22:38:48 +0200 Subject: [PATCH 030/109] Add perf counters to memory unit --- src/ccu_ctrl.sv | 6 ++++-- src/ccu_ctrl_memory_unit.sv | 42 ++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 4948223..54c9d0f 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -193,7 +193,7 @@ ccu_ctrl_decoder #( .b_queue_aw_o (b_queue_aw), .r_queue_ar_o (r_queue_ar), - .perf_evt_o (perf_evt_o) + .perf_evt_o () ); @@ -277,7 +277,9 @@ ccu_ctrl_memory_unit #( .mu_gnt_o (mu_gnt), .mu_req_i (mu_req), .mu_op_i (mu_op), - .first_responder_i (dec_first_responder) + .first_responder_i (dec_first_responder), + + .perf_evt_o (perf_evt_o) ); /////////////////// diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index afb2c58..e4bcf89 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -4,6 +4,7 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; parameter int unsigned AxiDataWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, + parameter bit PerfCounters = 1, parameter type mst_aw_chan_t = logic, parameter type w_chan_t = logic, parameter type mst_b_chan_t = logic, @@ -44,7 +45,9 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; output logic mu_gnt_o, input logic mu_req_i, input mu_op_e mu_op_i, - input logic [MstIdxBits-1:0] first_responder_i + input logic [MstIdxBits-1:0] first_responder_i, + + output logic [7:0] perf_evt_o ); localparam CD_FIFO_DEPTH = 2; @@ -353,5 +356,42 @@ axi_fifo #( .mst_resp_i (ccu_resp_i) ); +if (PerfCounters) begin : gen_perf_events + + logic perf_send_axi_req_r; + logic perf_send_axi_req_write_back_r; + logic perf_send_axi_req_w; + logic perf_send_axi_req_write_back_w; + logic perf_amo_wait_read; + logic perf_amo_wait_wb_r; + logic perf_amo_wait_wb_w; + logic perf_w_fifo_full; + + logic ungranted_request; + assign ungranted_request = mu_req_i && !mu_gnt_o; + + assign perf_send_axi_req_r = ungranted_request && ax_op_q == SEND_AXI_REQ_R; + assign perf_send_axi_req_write_back_r = ungranted_request && ax_op_q == SEND_AXI_REQ_WRITE_BACK_R; + assign perf_send_axi_req_w = ungranted_request && ax_op_q == SEND_AXI_REQ_W; + assign perf_send_axi_req_write_back_w = ungranted_request && ax_op_q == SEND_AXI_REQ_WRITE_BACK_W; + assign perf_amo_wait_read = ungranted_request && ax_op_q == AMO_WAIT_READ; + assign perf_amo_wait_wb_r = ungranted_request && ax_op_q == AMO_WAIT_WB_R; + assign perf_amo_wait_wb_w = ungranted_request && ax_op_q == AMO_WAIT_WB_W; + assign perf_w_fifo_full = ungranted_request && w_fifo_full; + + assign perf_evt_o = { + perf_send_axi_req_r, + perf_send_axi_req_write_back_r, + perf_send_axi_req_w, + perf_send_axi_req_write_back_w, + perf_amo_wait_read, + perf_amo_wait_wb_r, + perf_amo_wait_wb_w, + perf_w_fifo_full + }; +end else begin + assign perf_evt_o = '0; +end + endmodule \ No newline at end of file From 9c9c764ae1107eea4f35ca5c271e471e91d4f4ef Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 16 May 2024 15:00:12 +0200 Subject: [PATCH 031/109] Remove AMO_WAIT_READ state --- src/ccu_ctrl_memory_unit.sv | 23 ++++++----------------- src/ccu_ctrl_pkg.sv | 1 - 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index e4bcf89..05621fa 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -50,7 +50,7 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; output logic [7:0] perf_evt_o ); -localparam CD_FIFO_DEPTH = 2; +localparam CD_FIFO_DEPTH = 4; localparam AXI_FIFO_DEPTH = 0; // Passthrough localparam W_FIFO_DEPTH = 2; @@ -160,13 +160,7 @@ always_comb begin if (ccu_resp_in.aw_ready && !w_fifo_full) begin w_fifo_push = 1'b1; - if (ccu_req_holder_d.aw.atop[5]) begin - // Blocking behavior for AMO operations - // TODO: check if truly needed - ax_op_d = AMO_WAIT_READ; - end else begin - ax_busy_d = 1'b0; - end + ax_busy_d = 1'b0; end end SEND_AXI_REQ_WRITE_BACK_W: begin @@ -194,11 +188,6 @@ always_comb begin ax_op_d = SEND_AXI_REQ_W; end end - AMO_WAIT_READ: begin - if(ccu_resp_in.r_valid && ccu_req_i.r_ready && ccu_resp_in.r.last - && ccu_resp_in.r.id == ccu_req_holder_q.aw.id) - ax_busy_d = 1'b0; - end AMO_WAIT_WB_R: begin if(ccu_resp_in.b_valid && ccu_req_out.b_ready && ccu_resp_in.b.id == {1'b1, first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}) @@ -362,7 +351,7 @@ if (PerfCounters) begin : gen_perf_events logic perf_send_axi_req_write_back_r; logic perf_send_axi_req_w; logic perf_send_axi_req_write_back_w; - logic perf_amo_wait_read; + logic perf_cd_fifo_full; logic perf_amo_wait_wb_r; logic perf_amo_wait_wb_w; logic perf_w_fifo_full; @@ -374,19 +363,19 @@ if (PerfCounters) begin : gen_perf_events assign perf_send_axi_req_write_back_r = ungranted_request && ax_op_q == SEND_AXI_REQ_WRITE_BACK_R; assign perf_send_axi_req_w = ungranted_request && ax_op_q == SEND_AXI_REQ_W; assign perf_send_axi_req_write_back_w = ungranted_request && ax_op_q == SEND_AXI_REQ_WRITE_BACK_W; - assign perf_amo_wait_read = ungranted_request && ax_op_q == AMO_WAIT_READ; assign perf_amo_wait_wb_r = ungranted_request && ax_op_q == AMO_WAIT_WB_R; assign perf_amo_wait_wb_w = ungranted_request && ax_op_q == AMO_WAIT_WB_W; - assign perf_w_fifo_full = ungranted_request && w_fifo_full; + assign perf_cd_fifo_full = cd_fifo_full_o; + assign perf_w_fifo_full = w_fifo_full; assign perf_evt_o = { perf_send_axi_req_r, perf_send_axi_req_write_back_r, perf_send_axi_req_w, perf_send_axi_req_write_back_w, - perf_amo_wait_read, perf_amo_wait_wb_r, perf_amo_wait_wb_w, + perf_cd_fifo_full, perf_w_fifo_full }; end else begin diff --git a/src/ccu_ctrl_pkg.sv b/src/ccu_ctrl_pkg.sv index 6e0f643..4061a39 100644 --- a/src/ccu_ctrl_pkg.sv +++ b/src/ccu_ctrl_pkg.sv @@ -5,7 +5,6 @@ package ccu_ctrl_pkg; SEND_AXI_REQ_WRITE_BACK_R, SEND_AXI_REQ_W, SEND_AXI_REQ_WRITE_BACK_W, - AMO_WAIT_READ, AMO_WAIT_WB_R, AMO_WAIT_WB_W } mu_op_e; From 63f31146b82f3c47b36b9771badc9e89333fb1e6 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 23 May 2024 17:49:57 +0200 Subject: [PATCH 032/109] Disable perf counters by default and buffer them --- src/ccu_ctrl.sv | 12 +++++++++++- src/ccu_ctrl_decoder.sv | 2 +- src/ccu_ctrl_memory_unit.sv | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 54c9d0f..8ea4d3e 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -49,6 +49,8 @@ module ccu_ctrl import ccu_ctrl_pkg::*; import axi_pkg::*; output logic [7:0] perf_evt_o ); +logic [7:0] perf_evt; + localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; localparam int unsigned DCacheByteOffset = $clog2(DcacheLineWidth/8); localparam int unsigned MstIdxBits = $clog2(NoMstPorts); @@ -279,7 +281,7 @@ ccu_ctrl_memory_unit #( .mu_op_i (mu_op), .first_responder_i (dec_first_responder), - .perf_evt_o (perf_evt_o) + .perf_evt_o (perf_evt) ); /////////////////// @@ -558,4 +560,12 @@ end assign cd_first_responder = cd[cd_first_responder_out]; assign cd_handshake = cd_valid[cd_first_responder_out] && cd_ready[cd_first_responder_out]; +always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + perf_evt_o <= '0; + end else begin + perf_evt_o <= perf_evt; + end +end + endmodule diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 7e5191b..b60f7d3 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -5,7 +5,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; parameter int unsigned AxiAddrWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, - parameter bit PerfCounters = 1, + parameter bit PerfCounters = 0, parameter type slv_aw_chan_t = logic, parameter type w_chan_t = logic, parameter type slv_b_chan_t = logic, diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu_ctrl_memory_unit.sv index 05621fa..6dc8607 100644 --- a/src/ccu_ctrl_memory_unit.sv +++ b/src/ccu_ctrl_memory_unit.sv @@ -4,7 +4,7 @@ module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; parameter int unsigned AxiDataWidth = 0, parameter int unsigned NoMstPorts = 4, parameter int unsigned SlvAxiIDWidth = 0, - parameter bit PerfCounters = 1, + parameter bit PerfCounters = 0, parameter type mst_aw_chan_t = logic, parameter type w_chan_t = logic, parameter type mst_b_chan_t = logic, From 86be0f2e2e89e38b818079312fb42234309807f9 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 23 May 2024 22:47:44 +0200 Subject: [PATCH 033/109] Retime AC request handling --- src/ccu_ctrl_decoder.sv | 105 ++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index b60f7d3..9919f6a 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -70,8 +70,8 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; typedef enum logic [1:0] { INVALID_W, INVALID_R, RESP_R } cr_cmd_fifo_t; - logic stall; - logic ac_done; + logic generic_stall; + logic ac_ctrl_ready; logic cr_done; // AW FIFO @@ -130,7 +130,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; spill_register #( .T (slv_aw_chan_t), - .Bypass (1'b1) + .Bypass (1'b0) ) aw_spill_register ( .clk_i, .rst_ni, @@ -144,7 +144,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; spill_register #( .T (slv_ar_chan_t), - .Bypass (1'b1) + .Bypass (1'b0) ) ar_spill_register ( .clk_i, .rst_ni, @@ -184,9 +184,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; .idx_o ( arb_idx_out ) ); - assign stall = |{ - // Collission on address - collision, + assign generic_stall = |{ // CR CMD FIFO full cr_cmd_fifo_full, // CD CMD FIFO full @@ -194,11 +192,9 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; // AR requests, ID queue or FIFO full arb_idx_out == 0 && (r_queue_full_i || ar_fifo_full), // AW requests, ID queue or FIFO full - arb_idx_out == 1 && (b_queue_full_i || aw_fifo_full), - // AC is busy - ac_busy_q && !ac_done + arb_idx_out == 1 && (b_queue_full_i || aw_fifo_full) }; - assign arb_gnt_out = !stall; + assign arb_gnt_out = !generic_stall && !collision && ac_ctrl_ready; assign lookup_req_o = arb_req_out; assign lookup_addr_o = arb_idx_out == 1 ? axi_pkg::aligned_addr(aw_holder.addr,aw_holder.size): @@ -210,7 +206,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign cr_handshake[i] = m2s_resp_i[i].cr_valid & s2m_req_o[i].cr_ready; end - snoop_ac_t [NoMstPorts-1:0] ac_out; + snoop_ac_t ac_out; logic [NoMstPorts-1:0] ac_out_valid; logic [NoMstPorts-1:0] cr_out_ready; @@ -223,8 +219,6 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; end end - assign ac_done = (ac_handshake_q | ac_handshake) == '1; - // Hold snoop CR handshakes logic [NoMstPorts-1:0] data_available_q, response_error_q, shared_q, dirty_q; logic [NoMstPorts-1:0] data_available_d, response_error_d, shared_d, dirty_d; @@ -320,8 +314,11 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; always_comb begin + ac_ctrl_ready = 1'b0; + ac_d = ac_q; ac_out_valid = '0; + ac_out = ac_q; // Next state ac_busy_d = ac_busy_q; @@ -331,33 +328,55 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; aw_fifo_push = 1'b0; ar_fifo_push = 1'b0; - if (ac_busy_q) begin - ac_out_valid = ~ac_handshake_q; - ac_handshake_d = ac_handshake | ac_handshake_q; - end - - if (!ac_busy_q || ac_done) begin - if (arb_req_out && !stall) begin - ac_d = arb_ac_out; - ac_busy_d = 1'b1; - if (arb_idx_out == 1) begin - aw_fifo_push = 1'b1; - cr_cmd_fifo_in = INVALID_W; - ac_handshake_d = aw_initiator; - end else if (arb_idx_out == 0) begin - ar_fifo_push = 1'b1; - cr_cmd_fifo_in = send_invalid_r ? INVALID_R : RESP_R; - ac_handshake_d = ar_initiator; - end - end else begin - ac_busy_d = 1'b0; + case (ac_busy_q) + 1'b0: begin + ac_ctrl_ready = 1'b1; + ac_out = arb_ac_out; ac_handshake_d = '0; + if (arb_req_out && !generic_stall && !collision) begin + ac_d = arb_ac_out; + if (arb_idx_out == 1) begin + aw_fifo_push = 1'b1; + cr_cmd_fifo_in = INVALID_W; + ac_handshake_d = ac_handshake | aw_initiator; + ac_out_valid = ~aw_initiator; + ac_busy_d = (ac_handshake | aw_initiator) != '1; + end else if (arb_idx_out == 0) begin + ar_fifo_push = 1'b1; + cr_cmd_fifo_in = send_invalid_r ? INVALID_R : RESP_R; + ac_handshake_d = ac_handshake | ar_initiator; + ac_out_valid = ~ar_initiator; + ac_busy_d = (ac_handshake | ar_initiator) != '1; + end + end end - end + 1'b1: begin + ac_out_valid = ~ac_handshake_q; + ac_handshake_d = ac_handshake | ac_handshake_q; + ac_out = ac_q; + if ((ac_handshake | ac_handshake_q) == '1) begin + ac_ctrl_ready = 1'b1; + if (arb_req_out && !generic_stall && !collision) begin + ac_d = arb_ac_out; + ac_busy_d = 1'b1; + if (arb_idx_out == 1) begin + aw_fifo_push = 1'b1; + cr_cmd_fifo_in = INVALID_W; + ac_handshake_d = aw_initiator; + end else if (arb_idx_out == 0) begin + ar_fifo_push = 1'b1; + cr_cmd_fifo_in = send_invalid_r ? INVALID_R : RESP_R; + ac_handshake_d = ar_initiator; + end + end else begin + ac_busy_d = 1'b0; + ac_handshake_d = '0; + end + end + end + endcase end - assign ac_out = {NoMstPorts{ac_q}}; - assign cr_aw_initiator = 1 << aw_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; assign cr_ar_initiator = 1 << ar_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; @@ -467,7 +486,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; always_comb begin s2m_req_o = '0; for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac = ac_out[n]; + s2m_req_o[n].ac = ac_out; s2m_req_o[n].ac_valid = ac_out_valid[n]; s2m_req_o[n].cr_ready = cr_out_ready[n]; end @@ -545,17 +564,9 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; logic perf_generic_stall; logic perf_ac_busy_stall; logic perf_mu_stall; - logic generic_stall; logic collision_req_observed_q, collision_req_observed_d; - assign generic_stall = |{ - cr_cmd_fifo_full, - cd_fifo_stall_i, - arb_idx_out == 0 && (r_queue_full_i || ar_fifo_full), - arb_idx_out == 1 && (b_queue_full_i || aw_fifo_full) - }; - always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin collision_req_observed_q <= '0; @@ -571,7 +582,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; assign perf_collision_cycles = !ac_busy_q && arb_req_out && !generic_stall && collision; assign perf_collision_req = perf_collision_cycles && !collision_req_observed_q; assign perf_generic_stall = !ac_busy_q && arb_req_out && generic_stall; - assign perf_ac_busy_stall = ac_busy_q && arb_req_out && !ac_done; + assign perf_ac_busy_stall = arb_req_out && !ac_ctrl_ready; assign perf_mu_stall = mu_req_o && !mu_gnt_i; assign perf_evt_o = { From 01cc65521e506ddd2dc4e73ff65b09558f48e07b Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 28 May 2024 11:55:52 +0200 Subject: [PATCH 034/109] Add bypass to `ccu_ctrl_decoder` --- src/ccu_ctrl_decoder.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu_ctrl_decoder.sv index 9919f6a..f26dffd 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu_ctrl_decoder.sv @@ -130,7 +130,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; spill_register #( .T (slv_aw_chan_t), - .Bypass (1'b0) + .Bypass (1'b1) ) aw_spill_register ( .clk_i, .rst_ni, @@ -144,7 +144,7 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; spill_register #( .T (slv_ar_chan_t), - .Bypass (1'b0) + .Bypass (1'b1) ) ar_spill_register ( .clk_i, .rst_ni, From fbaabcf4b657aa4af57655cf9fa93c34f2fdad27 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 28 May 2024 18:33:14 +0200 Subject: [PATCH 035/109] Freeze latency configuration --- src/ace_ccu_top.sv | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/ace_ccu_top.sv b/src/ace_ccu_top.sv index ea7048d..a2371cc 100644 --- a/src/ace_ccu_top.sv +++ b/src/ace_ccu_top.sv @@ -111,11 +111,11 @@ for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_slv_port_demux .AxiLookBits ( Cfg.AxiIdUsedSlvPorts ), .UniqueIds ( Cfg.UniqueIds ), //.FallThrough ( Cfg.FallThrough ), - .SpillAw ( Cfg.LatencyMode[9] ), - .SpillW ( Cfg.LatencyMode[8] ), - .SpillB ( Cfg.LatencyMode[7] ), - .SpillAr ( Cfg.LatencyMode[6] ), - .SpillR ( Cfg.LatencyMode[5] ) + .SpillAw ( 1 ), + .SpillW ( 0 ), + .SpillB ( 0 ), + .SpillAr ( 1 ), + .SpillR ( 0 ) ) i_axi_demux ( .clk_i, // Clock .rst_ni, // Asynchronous reset active low @@ -147,11 +147,11 @@ axi_mux #( .NoSlvPorts ( Cfg.NoSlvPorts + 1 ), // Number of Masters for the modules .MaxWTrans ( Cfg.MaxMstTrans ), .FallThrough ( Cfg.FallThrough ), - .SpillAw ( Cfg.LatencyMode[4] ), - .SpillW ( Cfg.LatencyMode[3] ), - .SpillB ( Cfg.LatencyMode[2] ), - .SpillAr ( Cfg.LatencyMode[1] ), - .SpillR ( Cfg.LatencyMode[0] ) + .SpillAw ( '0 ), + .SpillW ( '0 ), + .SpillB ( '0 ), + .SpillAr ( '0 ), + .SpillR ( '0 ) ) i_axi_mux ( .clk_i, // Clock .rst_ni, // Asynchronous reset active low @@ -210,11 +210,11 @@ axi_mux #( .NoSlvPorts ( Cfg.NoSlvPorts ), // Number of Masters for the modules .MaxWTrans ( Cfg.MaxMstTrans ), .FallThrough ( Cfg.FallThrough ), - .SpillAw ( Cfg.LatencyMode[4] ), - .SpillW ( Cfg.LatencyMode[3] ), - .SpillB ( Cfg.LatencyMode[2] ), - .SpillAr ( Cfg.LatencyMode[1] ), - .SpillR ( Cfg.LatencyMode[0] ) + .SpillAw ( '0 ), + .SpillW ( '0 ), + .SpillB ( '0 ), + .SpillAr ( '0 ), + .SpillR ( '0 ) ) i_ace_mux ( .clk_i, // Clock .rst_ni, // Asynchronous reset active low From 06180ec96531d38dc0c1c2b244bad621c5fe73a5 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 17 Jun 2024 14:44:02 +0200 Subject: [PATCH 036/109] Fix naming convention for package parameters --- src/ace_ccu_top.sv | 2 +- src/ace_pkg.sv | 2 +- src/ccu_ctrl.sv | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ace_ccu_top.sv b/src/ace_ccu_top.sv index a2371cc..2882f27 100644 --- a/src/ace_ccu_top.sv +++ b/src/ace_ccu_top.sv @@ -231,7 +231,7 @@ for (genvar i = 0; i < Cfg.NoSlvPorts; i++) ccu_ctrl #( .DcacheLineWidth ( Cfg.DcacheLineWidth ), - .DCacheIndexWidth( Cfg.DCacheIndexWidth ), + .DcacheIndexWidth( Cfg.DcacheIndexWidth ), .AxiDataWidth ( Cfg.AxiDataWidth ), .AxiAddrWidth ( Cfg.AxiAddrWidth ), .NoMstPorts ( Cfg.NoSlvPorts ), diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index e4297ff..c9234c5 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -68,7 +68,7 @@ package ace_pkg; int unsigned AxiDataWidth; int unsigned AxiUserWidth; int unsigned DcacheLineWidth; - int unsigned DCacheIndexWidth; + int unsigned DcacheIndexWidth; } ccu_cfg_t; // transaction type diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index 8ea4d3e..f663128 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -8,7 +8,7 @@ module ccu_ctrl import ccu_ctrl_pkg::*; import axi_pkg::*; #( parameter int unsigned DcacheLineWidth = 0, - parameter int unsigned DCacheIndexWidth = 0, + parameter int unsigned DcacheIndexWidth = 0, parameter int unsigned AxiDataWidth = 0, parameter int unsigned AxiAddrWidth = 0, parameter int unsigned NoMstPorts = 4, @@ -56,7 +56,7 @@ localparam int unsigned DCacheByteOffset = $clog2(DcacheLineWidth/8); localparam int unsigned MstIdxBits = $clog2(NoMstPorts); localparam int unsigned IdQueueDataWidth = CollisionOnSetOnly ? - DCacheIndexWidth : + DcacheIndexWidth : AxiAddrWidth - DCacheByteOffset; typedef logic [IdQueueDataWidth-1:0] id_queue_data_t; From 409b1bfda1d444a7d61450ab521547f8b5797e01 Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Wed, 10 Jul 2024 18:19:33 +0200 Subject: [PATCH 037/109] Add one cycle latency in id_queues to cut timing loops. --- src/ccu_ctrl.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ccu_ctrl.sv b/src/ccu_ctrl.sv index f663128..b0e1ff2 100644 --- a/src/ccu_ctrl.sv +++ b/src/ccu_ctrl.sv @@ -392,6 +392,7 @@ id_queue #( .ID_WIDTH (SlvAxiIDWidth+1), .CAPACITY (6), .FULL_BW (1), + .CUT_OUP_POP_INP_GNT (1), .data_t (id_queue_data_t) ) b_id_queue ( .clk_i, @@ -420,6 +421,7 @@ id_queue #( .ID_WIDTH (SlvAxiIDWidth+1), .CAPACITY (6), .FULL_BW (1), + .CUT_OUP_POP_INP_GNT (1), .data_t (id_queue_data_t) ) r_id_queue ( .clk_i, From 090a5f2511812e1258c0c6ed855a7ebb49c844d7 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 18 Jun 2025 12:01:54 +0200 Subject: [PATCH 038/109] treewide: reorganize repository and add reworked CCU Co-Authored-By: Aleksi Korsman <34690092+Roenski@users.noreply.github.com> --- Bender.yml | 60 +- Makefile | 56 + README.md | 52 +- include/ace/convert.svh | 128 ++ include/ace/domain.svh | 44 + include/ace/typedef.svh | 19 +- lint/Makefile | 28 + lint/verible.waiver | 3 + lint/verible_rules.cfg | 5 + scripts/run_vsim.sh | 58 +- scripts/snoop_types.do | 23 + scripts/tb_ace_ccu_top.do | 41 + scripts/tb_ccu_ctrl_r_snoop.do | 26 + scripts/tb_ccu_ctrl_wr_snoop.do | 26 + src/ace_ccu_top.sv | 402 ----- src/ace_cut.sv | 250 +++ src/ace_intf.sv | 462 ++--- src/ace_pkg.sv | 425 ++++- src/ace_snoop_cut.sv | 160 ++ src/ace_trs_dec.sv | 41 - src/ccu/ace_ccu_ax_arbiter.sv | 175 ++ src/ccu/ace_ccu_cd_arbiter.sv | 145 ++ src/ccu/ace_ccu_cd_ctrl.sv | 235 +++ src/ccu/ace_ccu_frontend.sv | 256 +++ src/ccu/ace_ccu_pkg.sv | 94 + src/ccu/ace_ccu_read.sv | 101 ++ src/ccu/ace_ccu_snoop_pipe.sv | 335 ++++ src/ccu/ace_ccu_top.sv | 713 ++++++++ src/ccu/ace_ccu_tracker.sv | 230 +++ src/ccu/ace_ccu_write.sv | 243 +++ src/{ => ccu/deprecated}/ccu_ctrl.sv | 0 src/{ => ccu/deprecated}/ccu_ctrl_decoder.sv | 6 +- .../deprecated}/ccu_ctrl_memory_unit.sv | 0 src/{ => ccu/deprecated}/ccu_ctrl_pkg.sv | 0 .../deprecated}/ccu_ctrl_snoop_unit.sv | 0 src/deprecated/ace_sim_master.sv | 1543 +++++++++++++++++ src/{ => deprecated}/ace_test.sv | 282 ++- src/{ => deprecated}/snoop_test.sv | 56 +- src/snoop_intf.sv | 196 ++- src/snoop_pkg.sv | 44 - test/tb_ace_ccu_snoop_interconnect.sv | 229 +++ test/tb_ace_ccu_top.sv | 829 ++++----- test/tb_ace_ccu_top_old.sv | 536 ++++++ test/tb_ccu_ctrl_r_snoop.sv | 268 +++ test/tb_ccu_ctrl_wr_snoop.sv | 236 +++ test/tb_ccu_ctrl_wr_snoop_old.sv | 231 +++ test/vip/Python_README.md | 42 + test/vip/SV_README.md | 84 + test/vip/ace/ace_agent.svh | 112 ++ test/vip/ace/ace_beat_types.svh | 87 + test/vip/ace/ace_driver.svh | 236 +++ test/vip/ace/ace_monitor.svh | 85 + test/vip/ace/ace_sequencer.svh | 216 +++ test/vip/ace_test_pkg.sv | 8 + test/vip/cache/cache_beat_types.svh | 47 + test/vip/cache/cache_scoreboard.svh | 575 ++++++ test/vip/cache/cache_sequencer.svh | 151 ++ test/vip/cache/cache_top_agent.svh | 177 ++ test/vip/cache/mem_logger.svh | 74 + test/vip/cache/mem_sequencer.svh | 144 ++ test/vip/cache_test_pkg.sv | 10 + test/vip/python/cache_coherency_test.py | 584 +++++++ test/vip/python/cache_state.py | 319 ++++ test/vip/python/common.py | 116 ++ test/vip/python/memory_state.py | 77 + test/vip/python/transactions.py | 168 ++ test/vip/snoop/snoop_agent.svh | 98 ++ test/vip/snoop/snoop_beat_types.svh | 36 + test/vip/snoop/snoop_driver.svh | 116 ++ test/vip/snoop/snoop_monitor.svh | 50 + test/vip/snoop/snoop_sequencer.svh | 90 + test/vip/snoop_test_pkg.sv | 24 + 72 files changed, 11498 insertions(+), 1520 deletions(-) create mode 100644 include/ace/convert.svh create mode 100644 include/ace/domain.svh create mode 100644 lint/Makefile create mode 100644 lint/verible.waiver create mode 100644 lint/verible_rules.cfg create mode 100644 scripts/snoop_types.do create mode 100644 scripts/tb_ace_ccu_top.do create mode 100644 scripts/tb_ccu_ctrl_r_snoop.do create mode 100644 scripts/tb_ccu_ctrl_wr_snoop.do delete mode 100644 src/ace_ccu_top.sv create mode 100644 src/ace_cut.sv create mode 100644 src/ace_snoop_cut.sv delete mode 100644 src/ace_trs_dec.sv create mode 100644 src/ccu/ace_ccu_ax_arbiter.sv create mode 100644 src/ccu/ace_ccu_cd_arbiter.sv create mode 100644 src/ccu/ace_ccu_cd_ctrl.sv create mode 100644 src/ccu/ace_ccu_frontend.sv create mode 100644 src/ccu/ace_ccu_pkg.sv create mode 100644 src/ccu/ace_ccu_read.sv create mode 100644 src/ccu/ace_ccu_snoop_pipe.sv create mode 100644 src/ccu/ace_ccu_top.sv create mode 100644 src/ccu/ace_ccu_tracker.sv create mode 100644 src/ccu/ace_ccu_write.sv rename src/{ => ccu/deprecated}/ccu_ctrl.sv (100%) rename src/{ => ccu/deprecated}/ccu_ctrl_decoder.sv (98%) rename src/{ => ccu/deprecated}/ccu_ctrl_memory_unit.sv (100%) rename src/{ => ccu/deprecated}/ccu_ctrl_pkg.sv (100%) rename src/{ => ccu/deprecated}/ccu_ctrl_snoop_unit.sv (100%) create mode 100644 src/deprecated/ace_sim_master.sv rename src/{ => deprecated}/ace_test.sv (90%) rename src/{ => deprecated}/snoop_test.sv (91%) delete mode 100644 src/snoop_pkg.sv create mode 100644 test/tb_ace_ccu_snoop_interconnect.sv create mode 100644 test/tb_ace_ccu_top_old.sv create mode 100644 test/tb_ccu_ctrl_r_snoop.sv create mode 100644 test/tb_ccu_ctrl_wr_snoop.sv create mode 100644 test/tb_ccu_ctrl_wr_snoop_old.sv create mode 100644 test/vip/Python_README.md create mode 100644 test/vip/SV_README.md create mode 100644 test/vip/ace/ace_agent.svh create mode 100644 test/vip/ace/ace_beat_types.svh create mode 100644 test/vip/ace/ace_driver.svh create mode 100644 test/vip/ace/ace_monitor.svh create mode 100644 test/vip/ace/ace_sequencer.svh create mode 100644 test/vip/ace_test_pkg.sv create mode 100644 test/vip/cache/cache_beat_types.svh create mode 100644 test/vip/cache/cache_scoreboard.svh create mode 100644 test/vip/cache/cache_sequencer.svh create mode 100644 test/vip/cache/cache_top_agent.svh create mode 100644 test/vip/cache/mem_logger.svh create mode 100644 test/vip/cache/mem_sequencer.svh create mode 100644 test/vip/cache_test_pkg.sv create mode 100644 test/vip/python/cache_coherency_test.py create mode 100644 test/vip/python/cache_state.py create mode 100644 test/vip/python/common.py create mode 100644 test/vip/python/memory_state.py create mode 100644 test/vip/python/transactions.py create mode 100644 test/vip/snoop/snoop_agent.svh create mode 100644 test/vip/snoop/snoop_beat_types.svh create mode 100644 test/vip/snoop/snoop_driver.svh create mode 100644 test/vip/snoop/snoop_monitor.svh create mode 100644 test/vip/snoop/snoop_sequencer.svh create mode 100644 test/vip/snoop_test_pkg.sv diff --git a/Bender.yml b/Bender.yml index b881111..e85fc31 100644 --- a/Bender.yml +++ b/Bender.yml @@ -1,10 +1,13 @@ package: name: ace authors: - # Alphabetically ordered by last name (maintainers first) + - "Aleksi Korsman " + - "Riccardo Tedeschi " dependencies: - axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.0-beta.2 } + axi: { git: "https://github.com/ricted98/axi.git", rev: 1fd96dec948da018e50a9d40bf78f59bb2f6cd19 } + common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.38.0 } + axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", version: 0.8.2} export_include_dirs: - include @@ -15,29 +18,48 @@ sources: # levels 1 and 0, etc. Files within a level are ordered alphabetically. # Level 0 - src/ace_pkg.sv - - src/snoop_pkg.sv # Level 1 - src/ace_intf.sv - src/snoop_intf.sv - - src/ccu_ctrl_pkg.sv - # Level 2 - - src/ace_trs_dec.sv - - src/ccu_ctrl_decoder.sv - - src/ccu_ctrl_memory_unit.sv - - src/ccu_ctrl_snoop_unit.sv - # Level 3 - - src/ccu_ctrl.sv - # Leval 4 - - src/ace_ccu_top.sv - - - target: simulation - files: - - src/ace_test.sv - - src/snoop_test.sv + # ACE ips + - src/ace_cut.sv + - src/ace_snoop_cut.sv + # CCU src files + - src/ccu/ace_ccu_pkg.sv + - src/ccu/ace_ccu_ax_arbiter.sv + - src/ccu/ace_ccu_cd_arbiter.sv + - src/ccu/ace_ccu_cd_ctrl.sv + - src/ccu/ace_ccu_frontend.sv + - src/ccu/ace_ccu_read.sv + - src/ccu/ace_ccu_snoop_pipe.sv + - src/ccu/ace_ccu_tracker.sv + - src/ccu/ace_ccu_write.sv + - src/ccu/ace_ccu_top.sv + + + + #- target: simulation + # files: + # - src/ace_test.sv + # - src/snoop_test.sv + + #- target: test + # files: + # # Level 0 + # - test/tb_ace_ccu_pkg.sv + # # Level 1 + # - test/tb_ace_ccu_top.sv + + #- target: vscode + # files: + # - src/ccu/ccu_ctrl_wr_snoop.sv - target: test files: # Level 0 - - test/tb_ace_ccu_pkg.sv + - test/vip/ace_test_pkg.sv + - test/vip/snoop_test_pkg.sv # Level 1 + - test/vip/cache_test_pkg.sv + # Level 2 - test/tb_ace_ccu_top.sv diff --git a/Makefile b/Makefile index 8d0f048..3be7a62 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,44 @@ TBS ?= ace_ccu_top \ SIM_TARGETS := $(addsuffix .log,$(addprefix sim-,$(TBS))) +####### Simulation parameters ####### +# Address width +ADDR_WIDTH ?= 32 +# AXI/ACE data width +DATA_WIDTH ?= 64 +# Cache line word width +WORD_WIDTH ?= 64 +# Number of words in a cache line +CACHELINE_WORDS ?= 4 +# Number of ways in the cache model +WAYS ?= 2 +# Number of sets in the cache model +SETS ?= 16 +# Number of cached masters +NMASTERS ?= 4 +# Number of master groups +NGROUPS ?= 2 +# Number of transactions to be generated per master +NTRANSACTIONS ?= 100 +# Location of the generated files +MEM_DIR ?= $(PWD)/build/mem +# Seed for initial state generation. If empty, no seed +SEED ?= 10 +# Run coherency check after simulation +CHECK ?= 0 +# Debug mode for coherency checking +DEBUG ?= 1 + +export ADDR_WIDTH +export DATA_WIDTH +export WORD_WIDTH +export CACHELINE_WORDS +export WAYS +export SETS +export NMASTERS +export NGROUPS +export NTRANSACTIONS +export MEM_DIR .SHELL: bash @@ -51,6 +89,24 @@ sim_all: $(SIM_TARGETS) build: mkdir -p $@ +build/mem: build + mkdir -p $@ + +init_mem: build/mem + python3 test/vip/python/cache_coherency_test.py \ + --addr_width ${ADDR_WIDTH} \ + --data_width ${DATA_WIDTH} \ + --word_width ${WORD_WIDTH} \ + --cacheline_words ${CACHELINE_WORDS} \ + --ways ${WAYS} \ + --sets ${SETS} \ + --n_caches ${NMASTERS} \ + --n_transactions ${NTRANSACTIONS} \ + --target_dir $(MEM_DIR) \ + --seed $(SEED) \ + $(if $(filter 1, $(CHECK)),--check) \ + $(if $(filter 1, $(DEBUG)),--debug) + elab.log: Bender.yml | build export SYNOPSYS_DC="$(SYNOPSYS_DC)"; cd build && ../scripts/synth.sh | tee ../$@ diff --git a/README.md b/README.md index e66e687..03832ce 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,56 @@ This repository provides modules to implement cache coherence SoC's. |------------------------------------------------------|--------------------------------------------------------------------------------------------------------------|--------------------------------| | [`ace_ccu_top`](src/ace_ccu_top.sv) | ACE interconnector, broadcasts snooping messages to the cache controllers and AXI transactions to the slave | [Doc](doc/ace_ccu_top.md) | +## Verification + +Generate the initial cache and memory states, as well as the transaction streams, with the following command: + +``` +make init_mem +``` + +You can control simulation parameters, such as the memory and cache sizes and structures, number of caches, and number of transactions, in `Makefile`. + +You can simulate the top level design with +``` +make -B sim-ace_ccu_top.log +``` + +### Coherency check + +To run coherency check, run +``` +make init_mem CHECK=1 +``` +It will generate the initial cache and memory states, and stall until given a prompt. + +Next, open another terminal and simulate the top level design with +``` +make -B sim-ace_ccu_top.log +``` +Once the simulation finishes, press enter on the coherency check prompt. A coherency check will be run. A log file is generated called `cache_python.log`. Search with keyword `ERROR` to find whether coherency was broken during the simulation. When run with `DEBUG=1` (the default option), a pdb session is opened the moment a coherency problem is found. + ## License -The ACE repository is released under Solderpad v0.51 (SHL-0.51) see [LICENSE](LICENSE) \ No newline at end of file +The ACE repository is released under Solderpad v0.51 (SHL-0.51) see [LICENSE](LICENSE) + +## Publication + +If you use ACE/Culsans in your work, you can cite us: + +``` +@article{tedeschi2024culsans, + title={Culsans: An Efficient Snoop-based Coherency Unit + for the CVA6 Open Source RISC-V application processor}, + volume={10}, + number={2}, + journal={WiPiEC Journal - Works in Progress in Embedded Computing Journal}, + author={Tedeschi, Riccardo and Valente, Luca and Ottavi, Gianmarco and + Zelioli, Enrico and Wistoff, Nils and + Giacometti, Massimiliano and Basit Sajjad, Abdul and + Benini, Luca and Rossi, Davide}, + year={2024}, + month={Aug.} +} + +``` diff --git a/include/ace/convert.svh b/include/ace/convert.svh new file mode 100644 index 0000000..0c771cd --- /dev/null +++ b/include/ace/convert.svh @@ -0,0 +1,128 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + + +`ifndef ACE_CONVERT_SVH_ +`define ACE_CONVERT_SVH_ + +`include "axi/assign.svh" + +`define __ACE_TO_AXI_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp[1:0]; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_ACE_AW(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``atop = __rhs``__rhs_sep``atop; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; \ + __opt_as __lhs``__lhs_sep``snoop = '0; \ + __opt_as __lhs``__lhs_sep``bar = '0; \ + __opt_as __lhs``__lhs_sep``domain = '0; \ + __opt_as __lhs``__lhs_sep``awunique = '0; +`define __AXI_TO_ACE_AR(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; \ + __opt_as __lhs``__lhs_sep``snoop = '0; \ + __opt_as __lhs``__lhs_sep``bar = '0; \ + __opt_as __lhs``__lhs_sep``domain = '0; +`define __AXI_TO_ACE_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = {2'b00, __rhs``__rhs_sep``resp}; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; + +`define ACE_TO_AXI_ASSIGN_R_STRUCT(dst, src) \ + `__ACE_TO_AXI_R(assign, dst, ., src, .) + +`define AXI_TO_ACE_ASSIGN_AW_STRUCT(dst, src) \ + `__AXI_TO_ACE_AW(assign, dst, ., src, .) + +`define AXI_TO_ACE_ASSIGN_AR_STRUCT(dst, src) \ + `__AXI_TO_ACE_AR(assign, dst, ., src, .) + +`define AXI_TO_ACE_ASSIGN_R_STRUCT(dst, src) \ + `__AXI_TO_ACE_R(assign, dst, ., src, .) + +`define ACE_TO_AXI_SET_R_STRUCT(dst, src) \ + `__ACE_TO_AXI_R(, dst, ., src, .) + +`define AXI_TO_ACE_SET_AW_STRUCT(dst, src) \ + `__AXI_TO_ACE_AW(, dst, ., src, .) + +`define AXI_TO_ACE_SET_AR_STRUCT(dst, src) \ + `__AXI_TO_ACE_AR(, dst, ., src, .) + +`define AXI_TO_ACE_SET_R_STRUCT(dst, src) \ + `__AXI_TO_ACE_R(, dst, ., src, .) + + +`define ACE_TO_AXI_ASSIGN_REQ(dst, src) \ + `AXI_ASSIGN_AW_STRUCT(dst.aw, src.aw) \ + `AXI_ASSIGN_AR_STRUCT(dst.ar, src.ar) \ + `AXI_ASSIGN_W_STRUCT(dst.w, src.w) \ + assign dst.aw_valid = src.aw_valid; \ + assign dst.ar_valid = src.ar_valid; \ + assign dst.w_valid = src.w_valid; \ + assign dst.b_ready = src.b_ready; \ + assign dst.r_ready = src.r_ready; + +`define ACE_TO_AXI_ASSIGN_RESP(dst, src) \ + `ACE_TO_AXI_ASSIGN_R_STRUCT(dst.r, src.r) \ + `AXI_ASSIGN_B_STRUCT(dst.b, src.b) \ + assign dst.aw_ready = src.aw_ready; \ + assign dst.ar_ready = src.ar_ready; \ + assign dst.w_ready = src.w_ready; \ + assign dst.b_valid = src.b_valid; \ + assign dst.r_valid = src.r_valid; + +`define AXI_TO_ACE_ASSIGN_REQ(dst, src) \ + `AXI_TO_ACE_ASSIGN_AW_STRUCT(dst.aw, src.aw) \ + `AXI_TO_ACE_ASSIGN_AR_STRUCT(dst.ar, src.ar) \ + `AXI_ASSIGN_W_STRUCT(dst.w, src.w) \ + assign dst.aw_valid = src.aw_valid; \ + assign dst.ar_valid = src.ar_valid; \ + assign dst.w_valid = src.w_valid; \ + assign dst.b_ready = src.b_ready; \ + assign dst.r_ready = src.r_ready; + + +`define AXI_TO_ACE_ASSIGN_RESP(dst, src) \ + `AXI_TO_ACE_ASSIGN_R_STRUCT(dst.r, src.r) \ + `AXI_ASSIGN_B_STRUCT(dst.b, src.b) \ + assign dst.aw_ready = src.aw_ready; \ + assign dst.ar_ready = src.ar_ready; \ + assign dst.w_ready = src.w_ready; \ + assign dst.b_valid = src.b_valid; \ + assign dst.r_valid = src.r_valid; + + +`endif // ACE_CONVERT_SVH_ diff --git a/include/ace/domain.svh b/include/ace/domain.svh new file mode 100644 index 0000000..0b4772b --- /dev/null +++ b/include/ace/domain.svh @@ -0,0 +1,44 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + + +`ifndef ACE_DOMAIN_SVH_ +`define ACE_DOMAIN_SVH_ + + ////////////////// + // Domain types // + ////////////////// + +`define DOMAIN_BV_T(__width) \ + logic [__width-1:0] + +`define DOMAIN_RULE_T(__bv_t) \ + struct packed { \ + __bv_t initiator; \ + __bv_t inner; \ + __bv_t outer; \ + } + +`define DOMAIN_TYPEDEF_BV_T(__width, __bv_t) \ + typedef logic [__width-1:0] __bv_t; + +`define DOMAIN_TYPEDEF_RULE_T(__bv_t, __set_t) \ + typedef struct packed { \ + __bv_t initiator; \ + __bv_t inner; \ + __bv_t outer; \ + } __set_t; + +`define DOMAIN_TYPEDEF_ALL(__width, __bv_t, __set_t) \ + `DOMAIN_TYPEDEF_BV_T(__width, __bv_t) \ + `DOMAIN_TYPEDEF_RULE_T(__bv_t, __set_t) + +`endif // ACE_DOMAIN_SVH_ diff --git a/include/ace/typedef.svh b/include/ace/typedef.svh index 10d95a6..e877835 100644 --- a/include/ace/typedef.svh +++ b/include/ace/typedef.svh @@ -42,8 +42,8 @@ axi_pkg::atop_t atop; \ user_t user; \ ace_pkg::awsnoop_t snoop; \ - ace_pkg::bar_t bar; \ - ace_pkg::domain_t domain; \ + ace_pkg::axbar_t bar; \ + ace_pkg::axdomain_t domain; \ ace_pkg::awunique_t awunique; \ } aw_chan_t; `define ACE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) \ @@ -60,8 +60,8 @@ axi_pkg::region_t region; \ user_t user; \ ace_pkg::arsnoop_t snoop; \ - ace_pkg::bar_t bar; \ - ace_pkg::domain_t domain; \ + ace_pkg::axbar_t bar; \ + ace_pkg::axdomain_t domain; \ } ar_chan_t; `define ACE_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) \ typedef struct packed { \ @@ -124,8 +124,8 @@ `define SNOOP_TYPEDEF_AC_CHAN_T(ac_chan_t, addr_t) \ typedef struct packed { \ addr_t addr; \ - snoop_pkg::acsnoop_t snoop; \ - snoop_pkg::acprot_t prot; \ + ace_pkg::acsnoop_t snoop; \ + ace_pkg::acprot_t prot; \ } ac_chan_t; `define SNOOP_TYPEDEF_CD_CHAN_T(cd_chan_t, data_t) \ typedef struct packed { \ @@ -133,7 +133,7 @@ logic last; \ } cd_chan_t; `define SNOOP_TYPEDEF_CR_CHAN_T(cr_chan_t) \ - typedef snoop_pkg::crresp_t cr_chan_t; + typedef ace_pkg::crresp_t cr_chan_t; `define SNOOP_TYPEDEF_REQ_T(req_t, ac_chan_t) \ typedef struct packed { \ logic ac_valid; \ @@ -156,9 +156,10 @@ // // This defines `snoop_req_t` and `snoop_resp_t` request/response structs as well as `snoop_ac_chan_t`, // `snoop_cd_chan_t` and `snoop_cr_chan_t` channel structs. - `define SNOOP_TYPEDEF_ALL(__name, __addr_t, __data_t) \ - `SNOOP_TYPEDEF_AC_CHAN_T(__name``_aw_chan_t, __addr_t) \ + `define SNOOP_TYPEDEF_ALL(__name, __addr_t, __data_t) \ + `SNOOP_TYPEDEF_AC_CHAN_T(__name``_ac_chan_t, __addr_t) \ `SNOOP_TYPEDEF_CR_CHAN_T(__name``_cr_chan_t) \ + `SNOOP_TYPEDEF_CD_CHAN_T(__name``_cd_chan_t, __data_t) \ `SNOOP_TYPEDEF_REQ_T(__name``_req_t, __name``_ac_chan_t) \ `SNOOP_TYPEDEF_RESP_T(__name``_resp_t, __name``_cd_chan_t, __name``_cr_chan_t) //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/lint/Makefile b/lint/Makefile new file mode 100644 index 0000000..240869c --- /dev/null +++ b/lint/Makefile @@ -0,0 +1,28 @@ +BENDER ?= bender +VERIBLE_FORMAT ?= verible-verilog-format +VERIBLE_LINT ?= verible-verilog-lint +PROJ_ROOT = $(abspath ..) + +format: + $(VERIBLE_FORMAT) \ + --inplace \ + --indentation_spaces=4 \ + --column_limit=100 \ + --assignment_statement_alignment=align \ + --case_items_alignment=align \ + --module_net_variable_alignment=align \ + --port_declarations_alignment=align \ + --named_parameter_alignment=align \ + --named_port_alignment=align \ + --formal_parameters_alignment=align \ + --struct_union_members_alignment=align \ + --compact_indexing_and_selections=true \ + --port_declarations_right_align_packed_dimensions=true \ + --port_declarations_right_align_unpacked_dimensions=true \ + $$($(BENDER) script --no-deps -d $(PROJ_ROOT) flist) + +lint: + $(VERIBLE_LINT) \ + --rules_config verible_rules.cfg \ + --waiver_files verible.waiver \ + $$($(BENDER) script --no-deps -d $(PROJ_ROOT) flist) diff --git a/lint/verible.waiver b/lint/verible.waiver new file mode 100644 index 0000000..97681e3 --- /dev/null +++ b/lint/verible.waiver @@ -0,0 +1,3 @@ +waive --rule=interface-name-style --location=".*ace_intf\.sv" +waive --rule=line-length --location=".*ace_intf\.sv" +waive --rule=interface-name-style --location=".*snoop_intf\.sv" diff --git a/lint/verible_rules.cfg b/lint/verible_rules.cfg new file mode 100644 index 0000000..66353f2 --- /dev/null +++ b/lint/verible_rules.cfg @@ -0,0 +1,5 @@ +line-length=length:100 +parameter-name-style=localparam_style:CamelCase|ALL_CAPS;parameter_style:CamelCase|ALL_CAPS +no-tabs +no-trailing-spaces +signal-name-style diff --git a/scripts/run_vsim.sh b/scripts/run_vsim.sh index 9ed7409..7228e95 100755 --- a/scripts/run_vsim.sh +++ b/scripts/run_vsim.sh @@ -26,9 +26,16 @@ fi # regression-consistent. SEEDS=(0) +# $VSIM -do ${ROOT}/scripts/dofile.do -sv_seed $seed "$@" | tee vsim.log 2>&1 +# echo "run -all" | $VSIM -sv_seed $seed "$@" | tee vsim.log 2>&1 call_vsim() { for seed in ${SEEDS[@]}; do - echo "run -all" | $VSIM -sv_seed $seed "$@" | tee vsim.log 2>&1 + #echo "run -all" | $VSIM -sv_seed $seed "$@" | tee vsim.log 2>&1 + if [ -f ${ROOT}/scripts/$1.do ]; then + $VSIM -do ${ROOT}/scripts/$1.do -sv_seed $seed "$@" | tee vsim.log 2>&1 + else + $VSIM -sv_seed $seed "$@" | tee vsim.log 2>&1 + fi grep "Errors: 0," vsim.log done } @@ -40,37 +47,30 @@ exec_test() { fi case "$1" in ace_ccu_top) - for NumMst in 2 4 6; do - for NumSlv in 1; do - for Atop in 0 1 ; do - for Exclusive in 0 1; do - for UniqueIds in 0 1 ; do - call_vsim tb_ace_ccu_top -gTbNumMst=$NumMst -gTbNumSlv=$NumSlv \ - -gTbEnAtop=$Atop -gTbEnExcl=$Exclusive \ - -gTbUniqueIds=$UniqueIds - done - done - done - done - done + call_vsim tb_ace_ccu_top -t 1ns -classdebug -coverage -voptargs="+acc" \ + -gAddrWidth=$ADDR_WIDTH \ + -gDataWidth=$DATA_WIDTH \ + -gWordWidth=$WORD_WIDTH \ + -gCachelineWords=$CACHELINE_WORDS \ + -gWays=$WAYS \ + -gSets=$SETS \ + -gTbNumMst=$NMASTERS \ + -gNoMstGroups=$NGROUPS \ + -gMemDir=$MEM_DIR ;; - ace_ccu_top_sanity) - for NumMst in 2; do - for NumSlv in 1; do - for Atop in 0; do - for Exclusive in 0; do - for UniqueIds in 0; do - call_vsim tb_ace_ccu_top -gTbNumMst=$NumMst -gTbNumSlv=$NumSlv \ - -gTbEnAtop=$Atop -gTbEnExcl=$Exclusive \ - -gTbUniqueIds=$UniqueIds - done - done - done - done - done + ccu_ctrl_r_snoop) + call_vsim tb_ccu_ctrl_r_snoop -t 1ns -coverage -voptargs="+acc" \ + -gAddrWidth=$ADDR_WIDTH \ + -gDataWidth=$DATA_WIDTH \ + -gWordWidth=$WORD_WIDTH \ + -gCachelineWords=$CACHELINE_WORDS \ + -gWays=$WAYS \ + -gSets=$SETS \ + -gTbNumMst=$NMASTERS \ + -gMemDir=$MEM_DIR ;; *) - call_vsim tb_$1 -t 1ns -coverage -voptargs="+acc +cover=bcesfx" + call_vsim tb_$1 -t 1ns -coverage -voptargs="+acc" ;; esac } diff --git a/scripts/snoop_types.do b/scripts/snoop_types.do new file mode 100644 index 0000000..50b4363 --- /dev/null +++ b/scripts/snoop_types.do @@ -0,0 +1,23 @@ +radix define WriteSnoop { + 3'b000 "WrNoSnp/WrUnq/Br" + 3'b001 "WriteLineUnique" + 3'b010 "WriteClean" + 3'b011 "WriteBack" + 3'b100 "Evict" + 3'b101 "WriteEvict" +} + +radix define ReadSnoop { + 4'b0000 "RdNoSnp/RdOnce/Br" + 4'b0001 "ReadShared" + 4'b0010 "ReadClean" + 4'b0011 "ReadNotSharedDirty" + 4'b0111 "ReadUnique" + 4'b1011 "CleanUnique" + 4'b1100 "MakeUnique" + 4'b1000 "CleanShared" + 4'b1001 "CleanInvalid" + 4'b1101 "MakeInvalid" + 4'b1110 "DVMComplete" + 4'b1111 "DVMMessage" +} \ No newline at end of file diff --git a/scripts/tb_ace_ccu_top.do b/scripts/tb_ace_ccu_top.do new file mode 100644 index 0000000..df0467c --- /dev/null +++ b/scripts/tb_ace_ccu_top.do @@ -0,0 +1,41 @@ +log -r * + +log -class cache_test_pkg::cache_scoreboard::cache_scoreboard__1 + +do snoop_types.do + +# Figure out number of masters from number of ACE interfaces +set n_masters [llength [find instances sim:/tb_ace_ccu_top/ace_intf*]] + +# number of snoop blocks +set n_snoops [llength [find blocks sim:/tb_ace_ccu_top/ccu/i_ace_ccu_top/i_master_path/gen_snoop*]] + +add wave -divider "Clock and Reset" +add wave sim:/tb_ace_ccu_top/ccu/clk_i +add wave sim:/tb_ace_ccu_top/ccu/rst_ni + +add wave -divider "Towards memory" +add wave sim:/tb_ace_ccu_top/ccu/mst_req +add wave sim:/tb_ace_ccu_top/ccu/mst_resp + +for {set n 0} {$n < $n_masters} {incr n 1} { + add wave -divider "Towards cached master m$n" + add wave sim:/tb_ace_ccu_top/ccu/slv_reqs[$n] + add wave sim:/tb_ace_ccu_top/ccu/slv_resps[$n] + add wave -divider "Towards snooped cache m$n" + add wave sim:/tb_ace_ccu_top/ccu/snoop_reqs[$n] + add wave sim:/tb_ace_ccu_top/ccu/snoop_resps[$n] + + radix signal sim:/tb_ace_ccu_top/ccu/slv_reqs[$n].aw.snoop WriteSnoop + radix signal sim:/tb_ace_ccu_top/ccu/slv_reqs[$n].ar.snoop ReadSnoop +} + +for {set n 0} {$n < $n_snoops} {incr n 1} { + add wave -divider "FSM State $n" + add wave -label r_fsm sim:/tb_ace_ccu_top/ccu/i_ace_ccu_top/i_master_path/gen_snoop[$n]/i_snoop_path/i_ccu_ctrl_r_snoop/fsm_state_q + add wave -label wr_fsm sim:/tb_ace_ccu_top/ccu/i_ace_ccu_top/i_master_path/gen_snoop[$n]/i_snoop_path/i_ccu_ctrl_wr_snoop/fsm_state_q +} + +onfinish stop +run -all +view wave \ No newline at end of file diff --git a/scripts/tb_ccu_ctrl_r_snoop.do b/scripts/tb_ccu_ctrl_r_snoop.do new file mode 100644 index 0000000..c5fc86b --- /dev/null +++ b/scripts/tb_ccu_ctrl_r_snoop.do @@ -0,0 +1,26 @@ +configure wave -signalnamewidth 1 + +do snoop_types.do + +add wave -divider "Clock and Reset" +add wave sim:/tb_ccu_ctrl_r_snoop/DUT/clk_i +add wave sim:/tb_ccu_ctrl_r_snoop/DUT/rst_ni +add wave -divider "FSM State" +add wave sim:/tb_ccu_ctrl_r_snoop/DUT/fsm_state_q +add wave -divider "Towards cached master" +add wave sim:/tb_ccu_ctrl_r_snoop/DUT/slv_req_i +add wave sim:/tb_ccu_ctrl_r_snoop/DUT/slv_resp_o +add wave -divider "Towards memory" +add wave sim:/tb_ccu_ctrl_r_snoop/DUT/mst_req_o +add wave sim:/tb_ccu_ctrl_r_snoop/DUT/mst_resp_i +add wave -divider "Towards snooped cache" +add wave sim:/tb_ccu_ctrl_r_snoop/DUT/snoop_req_o +add wave sim:/tb_ccu_ctrl_r_snoop/DUT/snoop_resp_i + +radix signal sim:/tb_ccu_ctrl_r_snoop/DUT/slv_req_i.aw.snoop WriteSnoop +radix signal sim:/tb_ccu_ctrl_r_snoop/DUT/slv_req_i.ar.snoop ReadSnoop + +log -r * +onfinish stop +run -all +view wave \ No newline at end of file diff --git a/scripts/tb_ccu_ctrl_wr_snoop.do b/scripts/tb_ccu_ctrl_wr_snoop.do new file mode 100644 index 0000000..9064f82 --- /dev/null +++ b/scripts/tb_ccu_ctrl_wr_snoop.do @@ -0,0 +1,26 @@ +configure wave -signalnamewidth 1 + +do snoop_types.do + +add wave -divider "Clock and Reset" +add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/clk_i +add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/rst_ni +add wave -divider "FSM State" +add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/fsm_state_q +add wave -divider "Towards cached master" +add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/slv_req_i +add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/slv_resp_o +add wave -divider "Towards memory" +add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/mst_req_o +add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/mst_resp_i +add wave -divider "Towards snooped cache" +add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/snoop_req_o +add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/snoop_resp_i + +radix signal sim:/tb_ccu_ctrl_wr_snoop/DUT/slv_req_i.aw.snoop WriteSnoop +radix signal sim:/tb_ccu_ctrl_wr_snoop/DUT/slv_req_i.ar.snoop ReadSnoop + +log -r * +onfinish stop +run -all +view wave diff --git a/src/ace_ccu_top.sv b/src/ace_ccu_top.sv deleted file mode 100644 index 2882f27..0000000 --- a/src/ace_ccu_top.sv +++ /dev/null @@ -1,402 +0,0 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna -// Copyright (c) 2023 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// ace_ccu_top: Top level module for closely coupled cache coherency protocol -`include "ace/assign.svh" -`include "ace/typedef.svh" - -module ace_ccu_top - import cf_math_pkg::idx_width; -#( - parameter ace_pkg::ccu_cfg_t Cfg = '0, - parameter bit ATOPs = 1'b1, - parameter type slv_aw_chan_t = logic, - parameter type mst_aw_chan_t = logic, - parameter type mst_stg_aw_chan_t = logic, - parameter type w_chan_t = logic, - parameter type slv_b_chan_t = logic, - parameter type mst_b_chan_t = logic, - parameter type mst_stg_b_chan_t = logic, - parameter type slv_ar_chan_t = logic, - parameter type mst_ar_chan_t = logic, - parameter type mst_stg_ar_chan_t = logic, - parameter type slv_r_chan_t = logic, - parameter type mst_r_chan_t = logic, - parameter type mst_stg_r_chan_t = logic, - parameter type slv_req_t = logic, - parameter type slv_resp_t = logic, - parameter type mst_req_t = logic, - parameter type mst_resp_t = logic, - parameter type mst_stg_req_t = logic, - parameter type mst_stg_resp_t = logic, - parameter type reqs_mux_aw_chan_t= logic, - parameter type reqs_mux_ar_chan_t= logic, - parameter type reqs_mux_w_chan_t = logic, - parameter type reqs_mux_r_chan_t = logic, - parameter type reqs_mux_b_chan_t = logic, - parameter type reqs_mux_req_t = logic, - parameter type reqs_mux_resp_t = logic, - parameter type snoop_ac_t = logic, - parameter type snoop_cr_t = logic, - parameter type snoop_cd_t = logic, - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic - -) ( - input logic clk_i, - input logic rst_ni, - input logic test_i, - output logic [Cfg.NoSlvPorts-1:0][7:0] perf_evt_o, - input slv_req_t [Cfg.NoSlvPorts-1:0] slv_ports_req_i, - output slv_resp_t [Cfg.NoSlvPorts-1:0] slv_ports_resp_o, - output snoop_req_t [Cfg.NoSlvPorts-1:0] slv_snp_req_o, - input snoop_resp_t [Cfg.NoSlvPorts-1:0] slv_snp_resp_i, - output mst_req_t mst_ports_req_o, - input mst_resp_t mst_ports_resp_i -); - -// signals from the ace_demuxes -slv_req_t [Cfg.NoSlvPorts-1:0] [1:0] slv_reqs; // one for non-shareable and one for shareable req -slv_resp_t [Cfg.NoSlvPorts-1:0] [1:0] slv_resps; -// signals into the ace_muxes -mst_stg_req_t [Cfg.NoSlvPorts:0] mst_reqs; // one extra port for CCU -mst_stg_resp_t [Cfg.NoSlvPorts:0] mst_resps; -mst_stg_req_t [Cfg.NoSlvPorts:0] mst_reqs_tmp; -// signals into the CCU -slv_req_t [Cfg.NoSlvPorts-1:0] ccu_reqs_i; -slv_resp_t [Cfg.NoSlvPorts-1:0] ccu_resps_o; -// signals from the CCU -reqs_mux_req_t ccu_reqs_mux_o; -reqs_mux_resp_t ccu_resps_mux_i; -mst_stg_req_t ccu_reqs_o; -mst_stg_resp_t ccu_resps_i; - -// selection lines for mux and demuxes -logic [Cfg.NoSlvPorts-1:0] slv_aw_select, slv_ar_select; - - -for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_slv_port_demux - - // routing of incoming request through transaction type - ace_trs_dec #( - .slv_ace_req_t ( slv_req_t ) - ) i_ace_trs_dec ( - .slv_reqs_i ( slv_ports_req_i[i] ), - .snoop_aw_trs ( slv_aw_select[i] ), - .snoop_ar_trs ( slv_ar_select[i] ) - ); - - // demux - axi_demux #( - .AxiIdWidth ( Cfg.AxiIdWidthSlvPorts ), // ID Width - .AtopSupport ( ATOPs ), - .aw_chan_t ( slv_aw_chan_t ), // AW Channel Type - .w_chan_t ( w_chan_t ), // W Channel Type - .b_chan_t ( slv_b_chan_t ), // B Channel Type - .ar_chan_t ( slv_ar_chan_t ), // AR Channel Type - .r_chan_t ( slv_r_chan_t ), // R Channel Type - .axi_req_t ( slv_req_t ), - .axi_resp_t ( slv_resp_t ), - .NoMstPorts ( 2 ), // one for CCU module and one for mux - .MaxTrans ( Cfg.MaxMstTrans ), - .AxiLookBits ( Cfg.AxiIdUsedSlvPorts ), - .UniqueIds ( Cfg.UniqueIds ), - //.FallThrough ( Cfg.FallThrough ), - .SpillAw ( 1 ), - .SpillW ( 0 ), - .SpillB ( 0 ), - .SpillAr ( 1 ), - .SpillR ( 0 ) - ) i_axi_demux ( - .clk_i, // Clock - .rst_ni, // Asynchronous reset active low - .test_i, // Testmode enable - .slv_req_i ( slv_ports_req_i[i] ), - .slv_aw_select_i ( slv_aw_select[i] ), - .slv_ar_select_i ( slv_ar_select[i] ), - .slv_resp_o ( slv_ports_resp_o[i] ), - .mst_reqs_o ( slv_reqs[i] ), - .mst_resps_i ( slv_resps[i] ) - ); -end - -axi_mux #( - .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts+$clog2(Cfg.NoSlvPorts)+1 ), // ID width of the slave ports - .slv_aw_chan_t ( mst_stg_aw_chan_t ), // AW Channel Type, slave ports - .mst_aw_chan_t ( mst_aw_chan_t ), // AW Channel Type, master port - .w_chan_t ( w_chan_t ), // W Channel Type, all ports - .slv_b_chan_t ( mst_stg_b_chan_t ), // B Channel Type, slave ports - .mst_b_chan_t ( mst_b_chan_t ), // B Channel Type, master port - .slv_ar_chan_t ( mst_stg_ar_chan_t ), // AR Channel Type, slave ports - .mst_ar_chan_t ( mst_ar_chan_t ), // AR Channel Type, master port - .slv_r_chan_t ( mst_stg_r_chan_t ), // R Channel Type, slave ports - .mst_r_chan_t ( mst_r_chan_t ), // R Channel Type, master port - .slv_req_t ( mst_stg_req_t ), - .slv_resp_t ( mst_stg_resp_t ), - .mst_req_t ( mst_req_t ), - .mst_resp_t ( mst_resp_t ), - .NoSlvPorts ( Cfg.NoSlvPorts + 1 ), // Number of Masters for the modules - .MaxWTrans ( Cfg.MaxMstTrans ), - .FallThrough ( Cfg.FallThrough ), - .SpillAw ( '0 ), - .SpillW ( '0 ), - .SpillB ( '0 ), - .SpillAr ( '0 ), - .SpillR ( '0 ) -) i_axi_mux ( - .clk_i, // Clock - .rst_ni, // Asynchronous reset active low - .test_i, // Test Mode enable - .slv_reqs_i ( mst_reqs ), - .slv_resps_o ( mst_resps ), - .mst_req_o ( mst_ports_req_o ), - .mst_resp_i ( mst_ports_resp_i ) -); - - -// connection reqs and resps for non-shareable transactions with axi_mux -for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_non_shared_conn - `ACE_ASSIGN_REQ_STRUCT(mst_reqs_tmp[i], slv_reqs[i][0]) - `ACE_ASSIGN_RESP_STRUCT(slv_resps[i][0], mst_resps[i]) - - always_comb begin - mst_reqs[i] = mst_reqs_tmp[i]; - mst_reqs[i].aw.user[$clog2(Cfg.NoSlvPorts)-1:0] = i[$clog2(Cfg.NoSlvPorts)-1:0]; - mst_reqs[i].ar.user[$clog2(Cfg.NoSlvPorts)-1:0] = i[$clog2(Cfg.NoSlvPorts)-1:0]; - - end -end - -// connect CCU reqs and resps to mux -always_comb begin - mst_reqs[Cfg.NoSlvPorts] = mst_reqs_tmp[Cfg.NoSlvPorts]; - mst_reqs[Cfg.NoSlvPorts].aw.user[$clog2(Cfg.NoSlvPorts)-1:0] = mst_reqs_tmp[Cfg.NoSlvPorts].aw.id[Cfg.AxiIdWidthSlvPorts +: $clog2(Cfg.NoSlvPorts)]; - mst_reqs[Cfg.NoSlvPorts].ar.user[$clog2(Cfg.NoSlvPorts)-1:0] = mst_reqs_tmp[Cfg.NoSlvPorts].ar.id[Cfg.AxiIdWidthSlvPorts +: $clog2(Cfg.NoSlvPorts)]; -end -`ACE_ASSIGN_REQ_STRUCT(mst_reqs_tmp[Cfg.NoSlvPorts], ccu_reqs_o) -`ACE_ASSIGN_RESP_STRUCT(ccu_resps_i, mst_resps[Cfg.NoSlvPorts]) - -// connection reqs and resps for shareable transactions with CCU -for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_shared_conn - `ACE_ASSIGN_REQ_STRUCT(ccu_reqs_i[i], slv_reqs[i][1]) - `ACE_ASSIGN_RESP_STRUCT(slv_resps[i][1], ccu_resps_o[i]) -end - - -axi_mux #( - .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports - .slv_aw_chan_t ( slv_aw_chan_t ), // AW Channel Type, slave ports - .mst_aw_chan_t ( reqs_mux_aw_chan_t ), // AW Channel Type, master port - .w_chan_t ( w_chan_t ), // W Channel Type, all ports - .slv_b_chan_t ( slv_b_chan_t ), // B Channel Type, slave ports - .mst_b_chan_t ( reqs_mux_b_chan_t ), // B Channel Type, master port - .slv_ar_chan_t ( slv_ar_chan_t ), // AR Channel Type, slave ports - .mst_ar_chan_t ( reqs_mux_ar_chan_t ), // AR Channel Type, master port - .slv_r_chan_t ( slv_r_chan_t ), // R Channel Type, slave ports - .mst_r_chan_t ( reqs_mux_r_chan_t ), // R Channel Type, master port - .slv_req_t ( slv_req_t ), - .slv_resp_t ( slv_resp_t ), - .mst_req_t ( reqs_mux_req_t ), - .mst_resp_t ( reqs_mux_resp_t ), - .NoSlvPorts ( Cfg.NoSlvPorts ), // Number of Masters for the modules - .MaxWTrans ( Cfg.MaxMstTrans ), - .FallThrough ( Cfg.FallThrough ), - .SpillAw ( '0 ), - .SpillW ( '0 ), - .SpillB ( '0 ), - .SpillAr ( '0 ), - .SpillR ( '0 ) -) i_ace_mux ( - .clk_i, // Clock - .rst_ni, // Asynchronous reset active low - .test_i, // Test Mode enable - .slv_reqs_i ( ccu_reqs_i ), - .slv_resps_o ( ccu_resps_o ), - .mst_req_o ( ccu_reqs_mux_o ), - .mst_resp_i ( ccu_resps_mux_i ) -); - -logic [7:0] perf_evt_temp; -for (genvar i = 0; i < Cfg.NoSlvPorts; i++) - assign perf_evt_o[i] = perf_evt_temp; - -ccu_ctrl #( - .DcacheLineWidth ( Cfg.DcacheLineWidth ), - .DcacheIndexWidth( Cfg.DcacheIndexWidth ), - .AxiDataWidth ( Cfg.AxiDataWidth ), - .AxiAddrWidth ( Cfg.AxiAddrWidth ), - .NoMstPorts ( Cfg.NoSlvPorts ), - .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports - .mst_aw_chan_t ( mst_stg_aw_chan_t ), // AW Channel Type, master port - .w_chan_t ( w_chan_t ), // W Channel Type, all ports - .mst_b_chan_t ( mst_stg_b_chan_t ), // B Channel Type, master port - .mst_ar_chan_t ( mst_stg_ar_chan_t ), // AR Channel Type, master port - .mst_r_chan_t ( mst_stg_r_chan_t ), // R Channel Type, master port - .mst_req_t ( mst_stg_req_t ), - .mst_resp_t ( mst_stg_resp_t ), - .slv_aw_chan_t ( reqs_mux_aw_chan_t ), - .slv_b_chan_t ( reqs_mux_b_chan_t ), - .slv_ar_chan_t ( reqs_mux_ar_chan_t ), - .slv_r_chan_t ( reqs_mux_r_chan_t ), - .slv_req_t ( reqs_mux_req_t ), - .slv_resp_t ( reqs_mux_resp_t ), - .snoop_ac_t ( snoop_ac_t ), - .snoop_cr_t ( snoop_cr_t ), - .snoop_cd_t ( snoop_cd_t ), - .snoop_req_t ( snoop_req_t ), - .snoop_resp_t ( snoop_resp_t ) - -) ccu_ctrl_i ( - .clk_i, - .rst_ni, - .perf_evt_o ( perf_evt_temp ), - .ccu_req_i ( ccu_reqs_mux_o ), - .ccu_resp_o ( ccu_resps_mux_i ), - .ccu_req_o ( ccu_reqs_o ), - .ccu_resp_i ( ccu_resps_i ), - .s2m_req_o ( slv_snp_req_o ), - .m2s_resp_i ( slv_snp_resp_i ) -); - -endmodule - - - -module ace_ccu_top_intf - import cf_math_pkg::idx_width; -#( - parameter ace_pkg::ccu_cfg_t Cfg = '0, - parameter bit ATOPS = 1'b1 -) ( - input logic clk_i, - input logic rst_ni, - input logic test_i, - output logic [Cfg.NoSlvPorts-1:0][7:0] perf_evt_o, - SNOOP_BUS.Slave snoop_ports [Cfg.NoSlvPorts-1:0], - ACE_BUS.Slave slv_ports [Cfg.NoSlvPorts-1:0], - AXI_BUS.Master mst_ports -); - - localparam int unsigned AxiIdWidthReqsMux = Cfg.AxiIdWidthSlvPorts +$clog2(Cfg.NoSlvPorts); - localparam int unsigned AxiIdWidthMstPortsStage = AxiIdWidthReqsMux + 1; // Add one bit used by the CCU - localparam int unsigned AxiIdWidthMstPorts = AxiIdWidthMstPortsStage + $clog2(Cfg.NoSlvPorts+1); - - typedef logic [AxiIdWidthReqsMux -1:0] id_width_reqs_mux_t; - typedef logic [AxiIdWidthMstPortsStage-1:0] id_mst_stg_t; - typedef logic [AxiIdWidthMstPorts -1:0] id_mst_t; - typedef logic [Cfg.AxiIdWidthSlvPorts -1:0] id_slv_t; - typedef logic [Cfg.AxiAddrWidth -1:0] addr_t; - typedef logic [Cfg.AxiDataWidth -1:0] data_t; - typedef logic [Cfg.AxiDataWidth/8 -1:0] strb_t; - typedef logic [Cfg.AxiUserWidth -1:0] user_t; - - // snoop channel conversion - `ACE_TYPEDEF_AW_CHAN_T(mst_ace_stg_aw_chan_t, addr_t, id_mst_stg_t, user_t) - `ACE_TYPEDEF_AW_CHAN_T(mst_ace_aw_chan_t, addr_t, id_mst_t, user_t) - `ACE_TYPEDEF_AW_CHAN_T(slv_ace_aw_chan_t, addr_t, id_slv_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(mst_ace_stg_ar_chan_t, addr_t, id_mst_stg_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(mst_ace_ar_chan_t, addr_t, id_mst_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(slv_ace_ar_chan_t, addr_t, id_slv_t, user_t) - `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(mst_stg_b_chan_t, id_mst_stg_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, id_mst_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, id_slv_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(mst_ace_stg_r_chan_t, data_t, id_mst_stg_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(mst_ace_r_chan_t, data_t, id_mst_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(slv_ace_r_chan_t, data_t, id_slv_t, user_t) - `ACE_TYPEDEF_REQ_T(mst_ace_stg_req_t, mst_ace_stg_aw_chan_t, w_chan_t, mst_ace_stg_ar_chan_t) - `ACE_TYPEDEF_REQ_T(mst_ace_req_t, mst_ace_aw_chan_t, w_chan_t, mst_ace_ar_chan_t) - `ACE_TYPEDEF_REQ_T(slv_ace_req_t, slv_ace_aw_chan_t, w_chan_t, slv_ace_ar_chan_t) - `ACE_TYPEDEF_RESP_T(mst_ace_stg_resp_t, mst_stg_b_chan_t, mst_ace_stg_r_chan_t) - `ACE_TYPEDEF_RESP_T(mst_ace_resp_t, mst_b_chan_t, mst_ace_r_chan_t) - `ACE_TYPEDEF_RESP_T(slv_ace_resp_t, slv_b_chan_t, slv_ace_r_chan_t) - `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) - `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) - `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) - `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) - `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) - - `ACE_TYPEDEF_ALL(reqs_mux, addr_t, id_width_reqs_mux_t, data_t, strb_t, user_t) - - - mst_ace_req_t mst_ace_reqs; - mst_ace_resp_t mst_ace_resps; - slv_ace_req_t [Cfg.NoSlvPorts-1:0] slv_ace_reqs; - slv_ace_resp_t [Cfg.NoSlvPorts-1:0] slv_ace_resps; - snoop_req_t [Cfg.NoSlvPorts-1:0] snoop_reqs; - snoop_resp_t [Cfg.NoSlvPorts-1:0] snoop_resps; - - - /// Assigning ACE request from CCU Mux to slave(RAM) - `AXI_ASSIGN_FROM_REQ(mst_ports, mst_ace_reqs) - /// Assigning AXI response from slave (RAM) to CCU mux which accepts only ACE type response - `ACE_ASSIGN_TO_RESP(mst_ace_resps, mst_ports) - - - for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_assign_slv - `ACE_ASSIGN_TO_REQ(slv_ace_reqs[i], slv_ports[i]) - `ACE_ASSIGN_FROM_RESP(slv_ports[i], slv_ace_resps[i]) - /// Assigning SNOOP request from CCU logic to master - `SNOOP_ASSIGN_FROM_REQ(snoop_ports[i], snoop_reqs[i]) - /// Assigning SNOOP response from master to CCU logic - `SNOOP_ASSIGN_TO_RESP(snoop_resps[i], snoop_ports[i]) - end - - - ace_ccu_top #( - .Cfg ( Cfg ), - .ATOPs ( ATOPS ), - .slv_aw_chan_t ( slv_ace_aw_chan_t ), - .mst_stg_aw_chan_t ( mst_ace_stg_aw_chan_t ), - .mst_aw_chan_t ( mst_ace_aw_chan_t ), - .w_chan_t ( w_chan_t ), - .slv_b_chan_t ( slv_b_chan_t ), - .mst_b_chan_t ( mst_b_chan_t ), - .mst_stg_b_chan_t ( mst_stg_b_chan_t ), - .slv_ar_chan_t ( slv_ace_ar_chan_t ), - .mst_ar_chan_t ( mst_ace_ar_chan_t ), - .mst_stg_ar_chan_t ( mst_ace_stg_ar_chan_t ), - .slv_r_chan_t ( slv_ace_r_chan_t ), - .mst_r_chan_t ( mst_ace_r_chan_t ), - .mst_stg_r_chan_t ( mst_ace_stg_r_chan_t ), - .slv_req_t ( slv_ace_req_t ), - .slv_resp_t ( slv_ace_resp_t ), - .mst_req_t ( mst_ace_req_t ), - .mst_resp_t ( mst_ace_resp_t ), - .mst_stg_req_t ( mst_ace_stg_req_t ), - .mst_stg_resp_t ( mst_ace_stg_resp_t ), - .reqs_mux_aw_chan_t ( reqs_mux_aw_chan_t ), - .reqs_mux_ar_chan_t ( reqs_mux_ar_chan_t ), - .reqs_mux_w_chan_t ( reqs_mux_w_chan_t ), - .reqs_mux_r_chan_t ( reqs_mux_r_chan_t ), - .reqs_mux_b_chan_t ( reqs_mux_b_chan_t ), - .reqs_mux_req_t ( reqs_mux_req_t ), - .reqs_mux_resp_t ( reqs_mux_resp_t ), - .snoop_ac_t ( snoop_ac_t ), - .snoop_cr_t ( snoop_cr_t ), - .snoop_cd_t ( snoop_cd_t ), - .snoop_req_t ( snoop_req_t ), - .snoop_resp_t ( snoop_resp_t ) - ) i_ccu_top ( - .clk_i, - .rst_ni, - .test_i, - .perf_evt_o, - .slv_ports_req_i ( slv_ace_reqs ), - .slv_ports_resp_o ( slv_ace_resps ), - .slv_snp_req_o ( snoop_reqs ), - .slv_snp_resp_i ( snoop_resps ), - .mst_ports_req_o ( mst_ace_reqs ), - .mst_ports_resp_i ( mst_ace_resps ) - ); - -endmodule diff --git a/src/ace_cut.sv b/src/ace_cut.sv new file mode 100644 index 0000000..0eca0f2 --- /dev/null +++ b/src/ace_cut.sv @@ -0,0 +1,250 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Fabian Schuiki +// - Andreas Kurth +// - Andrea Tedeschi + +/// An ACE4 cut. +/// +/// Breaks all combinatorial paths between its input and output. +module ace_cut #( + // bypass enable + parameter bit Bypass = 1'b0, + parameter bit BypassAw = Bypass, + parameter bit BypassW = Bypass, + parameter bit BypassB = Bypass, + parameter bit BypassAr = Bypass, + parameter bit BypassR = Bypass, + parameter bit BypassAck = Bypass, + // ACE channel structs + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic, + // ACE request & response structs + parameter type ace_req_t = logic, + parameter type ace_resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + // salve port + input ace_req_t slv_req_i, + output ace_resp_t slv_resp_o, + // master port + output ace_req_t mst_req_o, + input ace_resp_t mst_resp_i +); + + // a spill register for each channel + spill_register #( + .T (aw_chan_t), + .Bypass(BypassAw) + ) i_reg_aw ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .valid_i(slv_req_i.aw_valid), + .ready_o(slv_resp_o.aw_ready), + .data_i (slv_req_i.aw), + .valid_o(mst_req_o.aw_valid), + .ready_i(mst_resp_i.aw_ready), + .data_o (mst_req_o.aw) + ); + + spill_register #( + .T (w_chan_t), + .Bypass(BypassW) + ) i_reg_w ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .valid_i(slv_req_i.w_valid), + .ready_o(slv_resp_o.w_ready), + .data_i (slv_req_i.w), + .valid_o(mst_req_o.w_valid), + .ready_i(mst_resp_i.w_ready), + .data_o (mst_req_o.w) + ); + + spill_register #( + .T (b_chan_t), + .Bypass(BypassB) + ) i_reg_b ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .valid_i(mst_resp_i.b_valid), + .ready_o(mst_req_o.b_ready), + .data_i (mst_resp_i.b), + .valid_o(slv_resp_o.b_valid), + .ready_i(slv_req_i.b_ready), + .data_o (slv_resp_o.b) + ); + + spill_register #( + .T (ar_chan_t), + .Bypass(BypassAr) + ) i_reg_ar ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .valid_i(slv_req_i.ar_valid), + .ready_o(slv_resp_o.ar_ready), + .data_i (slv_req_i.ar), + .valid_o(mst_req_o.ar_valid), + .ready_i(mst_resp_i.ar_ready), + .data_o (mst_req_o.ar) + ); + + spill_register #( + .T (r_chan_t), + .Bypass(BypassR) + ) i_reg_r ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .valid_i(mst_resp_i.r_valid), + .ready_o(mst_req_o.r_ready), + .data_i (mst_resp_i.r), + .valid_o(slv_resp_o.r_valid), + .ready_i(slv_req_i.r_ready), + .data_o (slv_resp_o.r) + ); + + if (BypassAck) begin : gen_xack_bypass + assign mst_req_o.wack = slv_req_i.wack; + assign mst_req_o.rack = slv_req_i.rack; + end else begin : gen_xack_reg + + logic wack_q; + logic rack_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + wack_q <= 1'b0; + rack_q <= 1'b0; + end else begin + wack_q <= slv_req_i.wack; + rack_q <= slv_req_i.rack; + end + end + + assign mst_req_o.wack = wack_q; + assign mst_req_o.rack = rack_q; + end +endmodule + +`include "ace/assign.svh" +`include "ace/typedef.svh" + +// interface wrapper +module ace_cut_intf #( + // Bypass eneable + parameter bit BYPASS = 1'b0, + parameter bit BYPASS_AW = BYPASS, + parameter bit BYPASS_W = BYPASS, + parameter bit BYPASS_B = BYPASS, + parameter bit BYPASS_AR = BYPASS, + parameter bit BYPASS_R = BYPASS, + parameter bit BYPASS_ACK = BYPASS, + // The address width. + parameter int unsigned ADDR_WIDTH = 0, + // The data width. + parameter int unsigned DATA_WIDTH = 0, + // The ID width. + parameter int unsigned ID_WIDTH = 0, + // The user data width. + parameter int unsigned USER_WIDTH = 0 +) ( + input logic clk_i, + input logic rst_ni, + ACE_BUS.Slave in, + ACE_BUS.Master out +); + + typedef logic [ID_WIDTH-1:0] id_t; + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + typedef logic [USER_WIDTH-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `ACE_TYPEDEF_REQ_T(ace_req_t, aw_chan_t, w_chan_t, ar_chan_t) + `ACE_TYPEDEF_RESP_T(ace_resp_t, b_chan_t, r_chan_t) + + ace_req_t slv_req, mst_req; + ace_resp_t slv_resp, mst_resp; + + `ACE_ASSIGN_TO_REQ(slv_req, in) + `ACE_ASSIGN_FROM_RESP(in, slv_resp) + + `ACE_ASSIGN_FROM_REQ(out, mst_req) + `ACE_ASSIGN_TO_RESP(mst_resp, out) + + ace_cut #( + .Bypass (BYPASS), + .BypassAw (BYPASS_AW), + .BypassW (BYPASS_W), + .BypassB (BYPASS_B), + .BypassAr (BYPASS_AR), + .BypassR (BYPASS_R), + .BypassAck (BYPASS_ACK), + .aw_chan_t (aw_chan_t), + .w_chan_t (w_chan_t), + .b_chan_t (b_chan_t), + .ar_chan_t (ar_chan_t), + .r_chan_t (r_chan_t), + .ace_req_t (ace_req_t), + .ace_resp_t(ace_resp_t) + ) i_ace_cut ( + .clk_i, + .rst_ni, + .slv_req_i (slv_req), + .slv_resp_o(slv_resp), + .mst_req_o (mst_req), + .mst_resp_i(mst_resp) + ); + + // Check the invariants. + // pragma translate_off +`ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) + else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) + else $fatal(1, "Wrong data width parameter"); + assert (ID_WIDTH > 0) + else $fatal(1, "Wrong id width parameter"); + assert (USER_WIDTH > 0) + else $fatal(1, "Wrong user width parameter"); + assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (in.AXI_DATA_WIDTH == DATA_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (in.AXI_ID_WIDTH == ID_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (in.AXI_USER_WIDTH == USER_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (out.AXI_DATA_WIDTH == DATA_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ID_WIDTH == ID_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (out.AXI_USER_WIDTH == USER_WIDTH) + else $fatal(1, "Wrong interface definition"); + end +`endif + // pragma translate_on +endmodule diff --git a/src/ace_intf.sv b/src/ace_intf.sv index 55abbaa..68ed0b3 100644 --- a/src/ace_intf.sv +++ b/src/ace_intf.sv @@ -10,266 +10,290 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +interface CLK_IF ( + input clk_i +); +endinterface // ACE bus interafces interface ACE_BUS #( - parameter int unsigned AXI_ADDR_WIDTH = 0, - parameter int unsigned AXI_DATA_WIDTH = 0, - parameter int unsigned AXI_ID_WIDTH = 0, - parameter int unsigned AXI_USER_WIDTH = 0 + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0 ); - localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; - typedef logic [AXI_ID_WIDTH-1:0] id_t; - typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; - typedef logic [AXI_DATA_WIDTH-1:0] data_t; - typedef logic [AXI_STRB_WIDTH-1:0] strb_t; - typedef logic [AXI_USER_WIDTH-1:0] user_t; + id_t aw_id; + addr_t aw_addr; + axi_pkg::len_t aw_len; + axi_pkg::size_t aw_size; + axi_pkg::burst_t aw_burst; + logic aw_lock; + axi_pkg::cache_t aw_cache; + axi_pkg::prot_t aw_prot; + axi_pkg::qos_t aw_qos; + axi_pkg::region_t aw_region; + axi_pkg::atop_t aw_atop; + user_t aw_user; + logic aw_valid; + logic aw_ready; + ace_pkg::awsnoop_t aw_snoop; + ace_pkg::axbar_t aw_bar; + ace_pkg::axdomain_t aw_domain; + ace_pkg::awunique_t aw_awunique; - id_t aw_id; - addr_t aw_addr; - axi_pkg::len_t aw_len; - axi_pkg::size_t aw_size; - axi_pkg::burst_t aw_burst; - logic aw_lock; - axi_pkg::cache_t aw_cache; - axi_pkg::prot_t aw_prot; - axi_pkg::qos_t aw_qos; - axi_pkg::region_t aw_region; - axi_pkg::atop_t aw_atop; - user_t aw_user; - logic aw_valid; - logic aw_ready; - ace_pkg::awsnoop_t aw_snoop; - ace_pkg::bar_t aw_bar; - ace_pkg::domain_t aw_domain; - ace_pkg::awunique_t aw_awunique; + data_t w_data; + strb_t w_strb; + logic w_last; + user_t w_user; + logic w_valid; + logic w_ready; - data_t w_data; - strb_t w_strb; - logic w_last; - user_t w_user; - logic w_valid; - logic w_ready; + id_t b_id; + axi_pkg::resp_t b_resp; + user_t b_user; + logic b_valid; + logic b_ready; - id_t b_id; - axi_pkg::resp_t b_resp; - user_t b_user; - logic b_valid; - logic b_ready; + id_t ar_id; + addr_t ar_addr; + axi_pkg::len_t ar_len; + axi_pkg::size_t ar_size; + axi_pkg::burst_t ar_burst; + logic ar_lock; + axi_pkg::cache_t ar_cache; + axi_pkg::prot_t ar_prot; + axi_pkg::qos_t ar_qos; + axi_pkg::region_t ar_region; + user_t ar_user; + logic ar_valid; + logic ar_ready; + ace_pkg::arsnoop_t ar_snoop; + ace_pkg::axbar_t ar_bar; + ace_pkg::axdomain_t ar_domain; - id_t ar_id; - addr_t ar_addr; - axi_pkg::len_t ar_len; - axi_pkg::size_t ar_size; - axi_pkg::burst_t ar_burst; - logic ar_lock; - axi_pkg::cache_t ar_cache; - axi_pkg::prot_t ar_prot; - axi_pkg::qos_t ar_qos; - axi_pkg::region_t ar_region; - user_t ar_user; - logic ar_valid; - logic ar_ready; - ace_pkg::arsnoop_t ar_snoop; - ace_pkg::bar_t ar_bar; - ace_pkg::domain_t ar_domain; - - id_t r_id; - data_t r_data; - ace_pkg::rresp_t r_resp; - logic r_last; - user_t r_user; - logic r_valid; - logic r_ready; + id_t r_id; + data_t r_data; + ace_pkg::rresp_t r_resp; + logic r_last; + user_t r_user; + logic r_valid; + logic r_ready; - logic wack; - logic rack; + logic wack; + logic rack; - modport Master ( - output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, input aw_ready, - output w_data, w_strb, w_last, w_user, w_valid, input w_ready, - input b_id, b_resp, b_user, b_valid, output b_ready, - output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, input ar_ready, - input r_id, r_data, r_resp, r_last, r_user, r_valid, output r_ready, - output wack, rack - ); + modport Master( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, + input aw_ready, + output w_data, w_strb, w_last, w_user, w_valid, + input w_ready, + input b_id, b_resp, b_user, b_valid, + output b_ready, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, + input ar_ready, + input r_id, r_data, r_resp, r_last, r_user, r_valid, + output r_ready, + output wack, rack + ); - modport Slave ( - input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, output aw_ready, - input w_data, w_strb, w_last, w_user, w_valid, output w_ready, - output b_id, b_resp, b_user, b_valid, input b_ready, - input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, output ar_ready, - output r_id, r_data, r_resp, r_last, r_user, r_valid, input r_ready, - input wack, rack - ); + modport Slave( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, + output aw_ready, + input w_data, w_strb, w_last, w_user, w_valid, + output w_ready, + output b_id, b_resp, b_user, b_valid, + input b_ready, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, + output ar_ready, + output r_id, r_data, r_resp, r_last, r_user, r_valid, + input r_ready, + input wack, rack + ); - modport Monitor ( - input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_ready, aw_snoop, aw_bar, aw_domain, aw_awunique, + modport Monitor( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_ready, aw_snoop, aw_bar, aw_domain, aw_awunique, w_data, w_strb, w_last, w_user, w_valid, w_ready, b_id, b_resp, b_user, b_valid, b_ready, ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_ready, ar_snoop, ar_bar, ar_domain, r_id, r_data, r_resp, r_last, r_user, r_valid, r_ready, wack, rack - ); + ); endinterface /// A clocked ACE interface for use in design verification. interface ACE_BUS_DV #( - parameter int unsigned AXI_ADDR_WIDTH = 0, - parameter int unsigned AXI_DATA_WIDTH = 0, - parameter int unsigned AXI_ID_WIDTH = 0, - parameter int unsigned AXI_USER_WIDTH = 0 -)( - input logic clk_i + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0 +) ( + input logic clk_i ); - localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; - typedef logic [AXI_ID_WIDTH-1:0] id_t; - typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; - typedef logic [AXI_DATA_WIDTH-1:0] data_t; - typedef logic [AXI_STRB_WIDTH-1:0] strb_t; - typedef logic [AXI_USER_WIDTH-1:0] user_t; + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; - id_t aw_id; - addr_t aw_addr; - axi_pkg::len_t aw_len; - axi_pkg::size_t aw_size; - axi_pkg::burst_t aw_burst; - logic aw_lock; - axi_pkg::cache_t aw_cache; - axi_pkg::prot_t aw_prot; - axi_pkg::qos_t aw_qos; - axi_pkg::region_t aw_region; - axi_pkg::atop_t aw_atop; - user_t aw_user; - logic aw_valid; - logic aw_ready; - ace_pkg::awsnoop_t aw_snoop; - ace_pkg::bar_t aw_bar; - ace_pkg::domain_t aw_domain; - ace_pkg::awunique_t aw_awunique; + id_t aw_id; + addr_t aw_addr; + axi_pkg::len_t aw_len; + axi_pkg::size_t aw_size; + axi_pkg::burst_t aw_burst; + logic aw_lock; + axi_pkg::cache_t aw_cache; + axi_pkg::prot_t aw_prot; + axi_pkg::qos_t aw_qos; + axi_pkg::region_t aw_region; + axi_pkg::atop_t aw_atop; + user_t aw_user; + logic aw_valid; + logic aw_ready; + ace_pkg::awsnoop_t aw_snoop; + ace_pkg::axbar_t aw_bar; + ace_pkg::axdomain_t aw_domain; + ace_pkg::awunique_t aw_awunique; - data_t w_data; - strb_t w_strb; - logic w_last; - user_t w_user; - logic w_valid; - logic w_ready; + data_t w_data; + strb_t w_strb; + logic w_last; + user_t w_user; + logic w_valid; + logic w_ready; - id_t b_id; - axi_pkg::resp_t b_resp; - user_t b_user; - logic b_valid; - logic b_ready; + id_t b_id; + axi_pkg::resp_t b_resp; + user_t b_user; + logic b_valid; + logic b_ready; - id_t ar_id; - addr_t ar_addr; - axi_pkg::len_t ar_len; - axi_pkg::size_t ar_size; - axi_pkg::burst_t ar_burst; - logic ar_lock; - axi_pkg::cache_t ar_cache; - axi_pkg::prot_t ar_prot; - axi_pkg::qos_t ar_qos; - axi_pkg::region_t ar_region; - user_t ar_user; - logic ar_valid; - logic ar_ready; - ace_pkg::arsnoop_t ar_snoop; - ace_pkg::bar_t ar_bar; - ace_pkg::domain_t ar_domain; + id_t ar_id; + addr_t ar_addr; + axi_pkg::len_t ar_len; + axi_pkg::size_t ar_size; + axi_pkg::burst_t ar_burst; + logic ar_lock; + axi_pkg::cache_t ar_cache; + axi_pkg::prot_t ar_prot; + axi_pkg::qos_t ar_qos; + axi_pkg::region_t ar_region; + user_t ar_user; + logic ar_valid; + logic ar_ready; + ace_pkg::arsnoop_t ar_snoop; + ace_pkg::axbar_t ar_bar; + ace_pkg::axdomain_t ar_domain; - id_t r_id; - data_t r_data; - ace_pkg::rresp_t r_resp; - logic r_last; - user_t r_user; - logic r_valid; - logic r_ready; + id_t r_id; + data_t r_data; + ace_pkg::rresp_t r_resp; + logic r_last; + user_t r_user; + logic r_valid; + logic r_ready; - logic wack; - logic rack; + logic wack; + logic rack; - modport Master ( - output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, input aw_ready, - output w_data, w_strb, w_last, w_user, w_valid, input w_ready, - input b_id, b_resp, b_user, b_valid, output b_ready, - output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar,ar_domain, input ar_ready, - input r_id, r_data, r_resp, r_last, r_user, r_valid, output r_ready, - output wack, rack - ); + modport Master( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, + input aw_ready, + output w_data, w_strb, w_last, w_user, w_valid, + input w_ready, + input b_id, b_resp, b_user, b_valid, + output b_ready, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar,ar_domain, + input ar_ready, + input r_id, r_data, r_resp, r_last, r_user, r_valid, + output r_ready, + output wack, rack + ); - modport Slave ( - input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, output aw_ready, - input w_data, w_strb, w_last, w_user, w_valid, output w_ready, - output b_id, b_resp, b_user, b_valid, input b_ready, - input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, output ar_ready, - output r_id, r_data, r_resp, r_last, r_user, r_valid, input r_ready, - input wack, rack - ); + modport Slave( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, + output aw_ready, + input w_data, w_strb, w_last, w_user, w_valid, + output w_ready, + output b_id, b_resp, b_user, b_valid, + input b_ready, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, + output ar_ready, + output r_id, r_data, r_resp, r_last, r_user, r_valid, + input r_ready, + input wack, rack + ); - modport Monitor ( - input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_ready, aw_snoop, aw_bar, aw_domain, aw_awunique, + modport Monitor( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_ready, aw_snoop, aw_bar, aw_domain, aw_awunique, w_data, w_strb, w_last, w_user, w_valid, w_ready, b_id, b_resp, b_user, b_valid, b_ready, ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_ready, ar_snoop, ar_bar, ar_domain, r_id, r_data, r_resp, r_last, r_user, r_valid, r_ready, wack, rack - ); + ); - // pragma translate_off - `ifndef VERILATOR - // Single-Channel Assertions: Signals including valid must not change between valid and handshake. - // AW - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_id))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_addr))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_len))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_size))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_burst))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_lock))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_cache))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_prot))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_qos))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_region))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_atop))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_user))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> aw_valid)); - // W - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> $stable(w_data))); - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> $stable(w_strb))); - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> $stable(w_last))); - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> $stable(w_user))); - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> w_valid)); - // B - assert property (@(posedge clk_i) ( b_valid && ! b_ready |=> $stable(b_id))); - assert property (@(posedge clk_i) ( b_valid && ! b_ready |=> $stable(b_resp))); - assert property (@(posedge clk_i) ( b_valid && ! b_ready |=> $stable(b_user))); - assert property (@(posedge clk_i) ( b_valid && ! b_ready |=> b_valid)); - // AR - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_id))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_addr))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_len))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_size))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_burst))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_lock))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_cache))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_prot))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_qos))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_region))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_user))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> ar_valid)); - // R - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_id))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_data))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_resp))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_last))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_user))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> r_valid)); - `endif - // pragma translate_on + // pragma translate_off +`ifndef VERILATOR + // Single-Channel Assertions: Signals including valid must not change between valid and handshake. + // AW + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_id))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_addr))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_len))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_size))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_burst))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_lock))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_cache))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_prot))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_qos))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_region))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_atop))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_user))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> aw_valid)); + // W + assert property (@(posedge clk_i) (w_valid && !w_ready |=> $stable(w_data))); + assert property (@(posedge clk_i) (w_valid && !w_ready |=> $stable(w_strb))); + assert property (@(posedge clk_i) (w_valid && !w_ready |=> $stable(w_last))); + assert property (@(posedge clk_i) (w_valid && !w_ready |=> $stable(w_user))); + assert property (@(posedge clk_i) (w_valid && !w_ready |=> w_valid)); + // B + assert property (@(posedge clk_i) (b_valid && !b_ready |=> $stable(b_id))); + assert property (@(posedge clk_i) (b_valid && !b_ready |=> $stable(b_resp))); + assert property (@(posedge clk_i) (b_valid && !b_ready |=> $stable(b_user))); + assert property (@(posedge clk_i) (b_valid && !b_ready |=> b_valid)); + // AR + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_id))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_addr))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_len))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_size))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_burst))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_lock))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_cache))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_prot))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_qos))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_region))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_user))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> ar_valid)); + // R + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_id))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_data))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_resp))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_last))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_user))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> r_valid)); +`endif + // pragma translate_on endinterface diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index c9234c5..fd45f50 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -15,72 +15,363 @@ /// Contains all necessary type definitions, constants, and generally useful functions. package ace_pkg; - // Support for snoop channels - typedef logic [3:0] arsnoop_t; - typedef logic [2:0] awsnoop_t; - typedef logic [1:0] bar_t; - typedef logic [1:0] domain_t; - typedef logic [0:0] awunique_t; - typedef logic [3:0] rresp_t; - - /// Slice on Demux AW channel. - localparam logic [9:0] DemuxAw = (1 << 9); - /// Slice on Demux W channel. - localparam logic [9:0] DemuxW = (1 << 8); - /// Slice on Demux B channel. - localparam logic [9:0] DemuxB = (1 << 7); - /// Slice on Demux AR channel. - localparam logic [9:0] DemuxAr = (1 << 6); - /// Slice on Demux R channel. - localparam logic [9:0] DemuxR = (1 << 5); - /// Slice on Mux AW channel. - localparam logic [9:0] MuxAw = (1 << 4); - /// Slice on Mux W channel. - localparam logic [9:0] MuxW = (1 << 3); - /// Slice on Mux B channel. - localparam logic [9:0] MuxB = (1 << 2); - /// Slice on Mux AR channel. - localparam logic [9:0] MuxAr = (1 << 1); - /// Slice on Mux R channel. - localparam logic [9:0] MuxR = (1 << 0); - /// Latency configuration for `ace_xbar`. - typedef enum logic [9:0] { - NO_LATENCY = 10'b000_00_000_00, - CUT_SLV_AX = DemuxAw | DemuxAr, - CUT_MST_AX = MuxAw | MuxAr, - CUT_ALL_AX = DemuxAw | DemuxAr | MuxAw | MuxAr, - CUT_SLV_PORTS = DemuxAw | DemuxW | DemuxB | DemuxAr | DemuxR, - CUT_MST_PORTS = MuxAw | MuxW | MuxB | MuxAr | MuxR, - CUT_ALL_PORTS = 10'b111_11_111_11 - } ccu_latency_e; - - /// Configuration for `ace_ccu`. - typedef struct packed { - int unsigned NoSlvPorts; - int unsigned MaxMstTrans; - int unsigned MaxSlvTrans; - bit FallThrough; - ccu_latency_e LatencyMode; - int unsigned AxiIdWidthSlvPorts; - int unsigned AxiIdUsedSlvPorts; - bit UniqueIds; - int unsigned AxiAddrWidth; - int unsigned AxiDataWidth; - int unsigned AxiUserWidth; - int unsigned DcacheLineWidth; - int unsigned DcacheIndexWidth; - } ccu_cfg_t; - - // transaction type - typedef enum logic[2:0] { - READ_NO_SNOOP, - READ_ONCE, - READ_SHARED, - READ_UNIQUE, - CLEAN_UNIQUE, - WRITE_NO_SNOOP, - WRITE_BACK, - WRITE_UNIQUE - } ace_trs_t; + ////////////// + // Typedefs // + ////////////// + + // Additional types for already existing AXI channels + typedef logic [3:0] arsnoop_t; + typedef logic [2:0] awsnoop_t; + typedef logic [1:0] axbar_t; + typedef logic [1:0] axdomain_t; + typedef logic [3:0] rresp_t; + typedef logic [0:0] awunique_t; + + // Snoop related types + typedef logic [3:0] acsnoop_t; + typedef logic [2:0] acprot_t; + + typedef struct packed { + logic WasUnique; + logic IsShared; + logic PassDirty; + logic Error; + logic DataTransfer; + } crresp_t; + + typedef struct packed { + acsnoop_t snoop_trs; + logic accepts_dirty; + logic accepts_dirty_shared; + logic accepts_shared; + } snoop_info_t; + + /////////////// + // Encodings // + /////////////// + + // AxDOMAIN + localparam axdomain_t NonShareable = 2'b00; + localparam axdomain_t InnerShareable = 2'b01; + localparam axdomain_t OuterShareable = 2'b10; + localparam axdomain_t System = 2'b11; + + + // AxBAR + localparam axbar_t NormalAccessRespectingBarriers = 2'b00; + localparam axbar_t MemoryBarrier = 2'b01; + localparam axbar_t NormalAccessIgnoringBarriers = 2'b10; + localparam axbar_t SynchronizationBarrier = 2'b11; + + // Uniquely defined here both for ARSNOOP and AWSNOOP + localparam int unsigned Barrier = 0; + + // ARSNOOP + localparam arsnoop_t ReadNoSnoop = 4'b0000; + localparam arsnoop_t ReadOnce = 4'b0000; + localparam arsnoop_t ReadShared = 4'b0001; + localparam arsnoop_t ReadClean = 4'b0010; + localparam arsnoop_t ReadNotSharedDirty = 4'b0011; + localparam arsnoop_t ReadUnique = 4'b0111; + localparam arsnoop_t CleanUnique = 4'b1011; + localparam arsnoop_t MakeUnique = 4'b1100; + localparam arsnoop_t CleanShared = 4'b1000; + localparam arsnoop_t CleanInvalid = 4'b1001; + localparam arsnoop_t MakeInvalid = 4'b1101; + localparam arsnoop_t DVMComplete = 4'b1110; + localparam arsnoop_t DVMMessage = 4'b1111; + /* Barrier is already defined */ + + // AWSNOOP + localparam awsnoop_t WriteNoSnoop = 3'b000; + localparam awsnoop_t WriteUnique = 3'b000; + localparam awsnoop_t WriteLineUnique = 3'b001; + localparam awsnoop_t WriteClean = 3'b010; + localparam awsnoop_t WriteBack = 3'b011; + localparam awsnoop_t Evict = 3'b100; + localparam awsnoop_t WriteEvict = 3'b101; + /* Barrier is already defined */ + + // ACSNOOP + // + // The encoding is shared with ARSNOOP transactions for the following cases: + // - ReadOnce + // - ReadShared + // - ReadClean + // - ReadNotSharedDirty + // - ReadUnique + // - CleanShared + // - CleanInvalid + // - MakeInvalid + // - DVMComplete + // - DVMMessage + // Cast the parameters to acsnoop_t for consistency (but works anyway) + + /////////////// + // Functions // + /////////////// + + // AWSNOOP decoding + function automatic logic is_write_no_snoop(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {NonShareable, System} && + awsnoop == WriteNoSnoop + ); + endfunction + + function automatic logic is_write_unique(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {InnerShareable, OuterShareable} && + awsnoop == WriteUnique + ); + endfunction + + function automatic logic is_write_line_unique(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {InnerShareable, OuterShareable} && + awsnoop == WriteLineUnique + ); + endfunction + + function automatic logic is_write_clean(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {NonShareable, InnerShareable, OuterShareable} && + awsnoop == WriteClean + ); + endfunction + + function automatic logic is_write_back(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {NonShareable, InnerShareable, OuterShareable} && + awsnoop == WriteBack + ); + endfunction + + function automatic logic is_evict(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {InnerShareable, OuterShareable} && + awsnoop == Evict + ); + endfunction + + function automatic logic is_write_evict(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {NonShareable, InnerShareable, OuterShareable} && + awsnoop == WriteEvict + ); + endfunction + + // ARSNOOP decoding + function automatic logic is_read_no_snoop(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return (arbar0 == 1'b0 && ardomain inside {NonShareable, System} && arsnoop == ReadNoSnoop); + endfunction + + function automatic logic is_read_once(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return (arbar0 == 1'b0 && ardomain inside {NonShareable} && arsnoop == ReadOnce); + endfunction + + function automatic logic is_read_shared(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == ReadShared + ); + endfunction + + function automatic logic is_read_clean(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == ReadClean + ); + endfunction + + function automatic logic is_read_not_shared_dirty(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == ReadNotSharedDirty + ); + endfunction + + function automatic logic is_read_unique(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == ReadUnique + ); + endfunction + + function automatic logic is_clean_unique(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == CleanUnique + ); + endfunction + + function automatic logic is_make_unique(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == MakeUnique + ); + endfunction + + function automatic logic is_clean_shared(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {NonShareable, InnerShareable, OuterShareable} && + arsnoop == CleanShared + ); + endfunction + + function automatic logic is_clean_invalid(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {NonShareable, InnerShareable, OuterShareable} && + arsnoop == CleanInvalid + ); + endfunction + + function automatic logic is_make_invalid(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {NonShareable, InnerShareable, OuterShareable} && + arsnoop == MakeInvalid + ); + endfunction + + // Transaction groups + + function automatic logic aw_is_coherent(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + logic retval; + unique case (1'b1) + is_write_unique(awbar0, awdomain, awsnoop): retval = 1'b1; + is_write_line_unique(awbar0, awdomain, awsnoop): retval = 1'b1; + default: retval = 1'b0; + endcase + return retval; + endfunction + + function automatic logic aw_is_memory_update(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + logic retval; + unique case (1'b1) + is_write_clean(awbar0, awdomain, awsnoop): retval = 1'b1; + is_write_back(awbar0, awdomain, awsnoop): retval = 1'b1; + is_evict(awbar0, awdomain, awsnoop): retval = 1'b1; + is_write_evict(awbar0, awdomain, awsnoop): retval = 1'b1; + default: retval = 1'b0; + endcase + return retval; + endfunction + + function automatic logic aw_is_non_blocking(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + logic retval; + unique case (1'b1) + aw_is_memory_update(awbar0, awdomain, awsnoop): retval = 1'b1; + is_write_no_snoop(awbar0, awdomain, awsnoop): retval = 1'b1; + default: retval = 1'b0; + endcase + return retval; + endfunction + + function automatic logic ar_is_coherent(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + logic retval; + unique case (1'b1) + is_read_once(arbar0, ardomain, arsnoop): retval = 1'b1; + is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + is_read_clean(arbar0, ardomain, arsnoop): retval = 1'b1; + is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; + is_read_unique(arbar0, ardomain, arsnoop): retval = 1'b1; + is_clean_unique(arbar0, ardomain, arsnoop): retval = 1'b1; + is_make_unique(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; + endcase + return retval; + endfunction + + function automatic logic ar_is_cache_maintenance(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + logic retval; + unique case (1'b1) + is_clean_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + is_clean_invalid(arbar0, ardomain, arsnoop): retval = 1'b1; + is_make_invalid(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; + endcase + return retval; + endfunction + + // Snoop transaction from initiating master transaction + function automatic acsnoop_t ar_acsnoop_map(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop, logic arlock); + acsnoop_t acsnoop; + unique case (1'b1) + is_clean_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(CleanInvalid); + is_make_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(MakeInvalid); + default: acsnoop = acsnoop_t'(arsnoop); + endcase + // Hacky way to support AMOs in Culsans with the legacy WB cache + if (arlock && is_read_once(arbar0, ardomain, arsnoop)) acsnoop = acsnoop_t'(CleanInvalid); + return acsnoop; + endfunction + + function automatic acsnoop_t aw_acsnoop_map(logic awbar0, axdomain_t awdomain, + arsnoop_t awsnoop); + acsnoop_t acsnoop; + unique case (1'b1) + is_write_unique(awbar0, awdomain, awsnoop): acsnoop = acsnoop_t'(CleanInvalid); + is_write_line_unique(awbar0, awdomain, awsnoop): acsnoop = acsnoop_t'(MakeInvalid); + default: acsnoop = acsnoop_t'(CleanInvalid); + endcase + return acsnoop; + endfunction + + function automatic logic ar_resp_accepts_dirty(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + logic retval; + unique case (1'b1) + is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; + is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + is_read_unique(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; + endcase + return retval; + endfunction + + function automatic logic ar_resp_accepts_dirty_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + logic retval; + unique case (1'b1) + is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; + endcase + return retval; + endfunction + + function automatic logic ar_resp_accepts_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + logic retval; + unique case (1'b1) + is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; + is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + is_read_clean(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; + endcase + return retval; + endfunction endpackage diff --git a/src/ace_snoop_cut.sv b/src/ace_snoop_cut.sv new file mode 100644 index 0000000..3ea54d8 --- /dev/null +++ b/src/ace_snoop_cut.sv @@ -0,0 +1,160 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +/// An ACE4 snoop cut. +/// +/// Breaks all combinatorial paths between its input and output. +module ace_snoop_cut #( + // bypass enable + parameter bit Bypass = 1'b0, + parameter bit BypassAc = Bypass, + parameter bit BypassCr = Bypass, + parameter bit BypassCd = Bypass, + // ACE snoop channel structs + parameter type ac_chan_t = logic, + parameter type cd_chan_t = logic, + parameter type cr_chan_t = logic, + // ACE snoop request & response structs + parameter type snoop_req_t = logic, + parameter type snoop_resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + // salve port + input snoop_req_t slv_req_i, + output snoop_resp_t slv_resp_o, + // master port + output snoop_req_t mst_req_o, + input snoop_resp_t mst_resp_i +); + + // Snoop channels cut + spill_register #( + .T (ac_chan_t), + .Bypass(BypassAc) + ) i_reg_ac ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .valid_i(slv_req_i.ac_valid), + .ready_o(slv_resp_o.ac_ready), + .data_i (slv_req_i.ac), + .valid_o(mst_req_o.ac_valid), + .ready_i(mst_resp_i.ac_ready), + .data_o (mst_req_o.ac) + ); + + spill_register #( + .T (cd_chan_t), + .Bypass(BypassCd) + ) i_reg_cd ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .valid_i(mst_resp_i.cd_valid), + .ready_o(mst_req_o.cd_ready), + .data_i (mst_resp_i.cd), + .valid_o(slv_resp_o.cd_valid), + .ready_i(slv_req_i.cd_ready), + .data_o (slv_resp_o.cd) + ); + + spill_register #( + .T (cr_chan_t), + .Bypass(BypassCr) + ) i_reg_cr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .valid_i(mst_resp_i.cr_valid), + .ready_o(mst_req_o.cr_ready), + .data_i (mst_resp_i.cr_resp), + .valid_o(slv_resp_o.cr_valid), + .ready_i(slv_req_i.cr_ready), + .data_o (slv_resp_o.cr_resp) + ); + +endmodule + +`include "ace/assign.svh" +`include "ace/typedef.svh" + +// interface wrapper +module ace_snoop_cut_intf #( + // Bypass eneable + parameter bit BYPASS = 1'b0, + parameter bit BYPASS_AC = BYPASS, + parameter bit BYPASS_CR = BYPASS, + parameter bit BYPASS_CD = BYPASS, + // The address width. + parameter int unsigned ADDR_WIDTH = 0, + // The data width. + parameter int unsigned DATA_WIDTH = 0 +) ( + input logic clk_i, + input logic rst_ni, + SNOOP_BUS.Slave in, + SNOOP_BUS.Master out +); + + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + + `SNOOP_TYPEDEF_ALL(snoop, addr_t, data_t) + + snoop_req_t slv_req, mst_req; + snoop_resp_t slv_resp, mst_resp; + + `SNOOP_ASSIGN_TO_REQ(slv_req, in) + `SNOOP_ASSIGN_FROM_RESP(in, slv_resp) + + `SNOOP_ASSIGN_FROM_REQ(out, mst_req) + `SNOOP_ASSIGN_TO_RESP(mst_resp, out) + + ace_snoop_cut #( + .Bypass (BYPASS), + .BypassAc (BYPASS_AC), + .BypassCr (BYPASS_CR), + .BypassCd (BYPASS_CD), + .ac_chan_t (snoop_ac_chan_t), + .cd_chan_t (snoop_cd_chan_t), + .cr_chan_t (snoop_cr_chan_t), + .snoop_req_t (snoop_req_t), + .snoop_resp_t(snoop_resp_t) + ) i_ace_snoop_cut ( + .clk_i, + .rst_ni, + .slv_req_i (slv_req), + .slv_resp_o(slv_resp), + .mst_req_o (mst_req), + .mst_resp_i(mst_resp) + ); + + // Check the invariants. + // pragma translate_off +`ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) + else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) + else $fatal(1, "Wrong data width parameter"); + assert (in.SNOOP_ADDR_WIDTH == ADDR_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (in.SNOOP_DATA_WIDTH == DATA_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (out.SNOOP_ADDR_WIDTH == ADDR_WIDTH) + else $fatal(1, "Wrong interface definition"); + assert (out.SNOOP_DATA_WIDTH == DATA_WIDTH) + else $fatal(1, "Wrong interface definition"); + end +`endif + // pragma translate_on +endmodule diff --git a/src/ace_trs_dec.sv b/src/ace_trs_dec.sv deleted file mode 100644 index ecb2442..0000000 --- a/src/ace_trs_dec.sv +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2019 ETH Zurich and University of Bologna. -// Copyright (c) 2022 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// - -module ace_trs_dec -#( - parameter type slv_ace_req_t = logic -) ( - // incoming request from master - input slv_ace_req_t slv_reqs_i, - // Write transaction shareable - output logic snoop_aw_trs, - // Read transaction shareable - output logic snoop_ar_trs -); - -/// Types of transactions bypassing CCU -logic write_back, write_no_snoop, read_no_snoop; - -assign write_back = (slv_reqs_i.aw.snoop == 'b011) && (slv_reqs_i.aw.bar[0] == 'b0) && - ((slv_reqs_i.aw.domain == 'b00) || (slv_reqs_i.aw.domain == 'b01) || - (slv_reqs_i.aw.domain == 'b10)); - -assign write_no_snoop = (slv_reqs_i.aw.snoop == 'b000) && (slv_reqs_i.aw.bar[0] == 'b0) && - ((slv_reqs_i.aw.domain == 'b00) || (slv_reqs_i.aw.domain == 'b11) ); -assign read_no_snoop = (slv_reqs_i.ar.snoop == 'b0000) && (slv_reqs_i.ar.bar[0] =='b0) && - ((slv_reqs_i.ar.domain == 'b00) || (slv_reqs_i.ar.domain == 'b11) ); - -assign snoop_aw_trs = ~(write_back | write_no_snoop); -assign snoop_ar_trs = ~(read_no_snoop); - -endmodule diff --git a/src/ccu/ace_ccu_ax_arbiter.sv b/src/ccu/ace_ccu_ax_arbiter.sv new file mode 100644 index 0000000..9825e87 --- /dev/null +++ b/src/ccu/ace_ccu_ax_arbiter.sv @@ -0,0 +1,175 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "axi/assign.svh" + +module ace_ccu_ax_arbiter + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type ccu_aw_t = logic, + parameter type ccu_ar_t = logic, + parameter type ccu_ax_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input logic replay_full_i, + + input ccu_aw_t aw_i, + input logic aw_valid_i, + output logic aw_ready_o, + input ccu_ar_t ar_i, + input logic ar_valid_i, + output logic ar_ready_o, + input ccu_ar_t replay_ar_i, + input logic replay_ar_valid_i, + output logic replay_ar_ready_o, + + output ccu_ax_t ax_o, + output logic ax_valid_o, + input logic ax_ready_i, + output logic ax_is_write_o, + output logic ax_is_replay_o, + output acsnoop_t ax_acsnoop_o, + output logic ar_accepts_dirty_o, + output logic ar_accepts_dirty_shared_o, + output logic ar_accepts_shared_o, + output axdomain_t ax_domain_o +); + + // Internal signals + // {{{ + ccu_ar_t ar_muxed; + ccu_ax_t aw_in; + ccu_ax_t ar_in; + ccu_ax_t ax; + ccu_ax_t replay_ar; + logic ax_valid; + logic ax_ready; + logic ax_arb_valid; + logic ax_arb_ready; + logic ax_is_write; + acsnoop_t aw_acsnoop; + acsnoop_t ar_acsnoop; + logic ar_accepts_dirty; + logic ar_accepts_dirty_shared; + logic ar_accepts_shared; + // }}} + + // Coherence decoding + // {{{ + assign ar_muxed = ax_is_replay_o ? replay_ar_i : ar_i; + // ACSNOOP computed from AWSNOOP + assign aw_acsnoop = aw_acsnoop_map(aw_i.bar[0], aw_i.domain, aw_i.snoop); + // ACSNOOP computed from ARSNOOP + assign ar_acsnoop = ar_acsnoop_map( + ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop, ar_muxed.lock + ); + // Read transaction can accept a cacheline in Dirty state + assign ar_accepts_dirty = ar_resp_accepts_dirty( + ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop + ); + // Read transaction can accept a cacheline in Dirty and Shared state + assign ar_accepts_dirty_shared = ar_resp_accepts_dirty_shared( + ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop + ); + // Read transaction can accept a cacheline in Shared state + assign ar_accepts_shared = ar_resp_accepts_shared( + ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop + ); + // Mux output signals + always_comb begin : output_mux + // AR request (input or replay) + ax_is_write_o = 1'b0; + ax_acsnoop_o = ar_acsnoop; + ar_accepts_dirty_o = ar_accepts_dirty; + ar_accepts_dirty_shared_o = ar_accepts_dirty_shared; + ar_accepts_shared_o = ar_accepts_shared; + ax_domain_o = ar_muxed.domain; + + if (!ax_is_replay_o && ax_is_write) begin + // AW request (input) + ax_is_write_o = 1'b1; + ax_acsnoop_o = aw_acsnoop; + ax_domain_o = aw_i.domain; + end + end + // }}} + + // Input AX arbiter + // {{{ + // Assign AW to internal AX data type + always_comb begin + aw_in = '0; + `AXI_SET_AW_STRUCT(aw_in, aw_i) + end + + // Assign AR to internal AX data type + always_comb begin + ar_in = '0; + `AXI_SET_AR_STRUCT(ar_in, ar_i) + end + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ax_t), + .AxiVldRdy(1'b1), + .LockIn (1'b1) + ) u_ax_arbiter ( + .clk_i, + .rst_ni, + .flush_i(1'b0), + .rr_i (1'b0), + .req_i ({aw_valid_i, ar_valid_i}), + .gnt_o ({aw_ready_o, ar_ready_o}), + .data_i ({aw_in, ar_in}), + .req_o (ax_arb_valid), + .gnt_i (ax_arb_ready), + .data_o (ax), + .idx_o (ax_is_write) + ); + + assign ax_valid = !replay_full_i && ax_arb_valid; + assign ax_arb_ready = !replay_full_i && ax_ready; + // }}} + + // Replay arbiter + // {{{ + // Assign replay AR to internal AX data type + always_comb begin + replay_ar = '0; + `AXI_SET_AR_STRUCT(replay_ar, replay_ar_i) + end + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ax_t), + .AxiVldRdy(1'b1), + .LockIn (1'b0), + .ExtPrio (1'b1) + ) u_replay_arbiter ( + .clk_i, + .rst_ni, + .flush_i(1'b0), + .rr_i ('1), + .req_i ({replay_ar_valid_i, ax_valid}), + .gnt_o ({replay_ar_ready_o, ax_ready}), + .data_i ({replay_ar, ax}), + .req_o (ax_valid_o), + .gnt_i (ax_ready_i), + .data_o (ax_o), + .idx_o (ax_is_replay_o) + ); + // }}} + +endmodule diff --git a/src/ccu/ace_ccu_cd_arbiter.sv b/src/ccu/ace_ccu_cd_arbiter.sv new file mode 100644 index 0000000..3e2bd22 --- /dev/null +++ b/src/ccu/ace_ccu_cd_arbiter.sv @@ -0,0 +1,145 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module ace_ccu_cd_arbiter + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CcuCfg = '0, + parameter type cd_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input logic [CcuCfg.u.SlvPorts-1:0] cd_valid_i, + output logic [CcuCfg.u.SlvPorts-1:0] cd_ready_o, + input cd_t [CcuCfg.u.SlvPorts-1:0] cd_i, + + input logic cd_sel_valid_i, + output logic cd_sel_ready_o, + input logic [CcuCfg.u.SlvPorts-1:0] cd_sel_bv_i, + + output logic cd_valid_o, + input logic cd_ready_i, + output cd_t cd_o +); + + logic [ CcuCfg.u.SlvPorts-1:0] cd_sel_fork_out_valid; + logic [ CcuCfg.u.SlvPorts-1:0] cd_sel_fork_out_ready; + logic [ CcuCfg.u.SlvPorts-1:0] cd_last; + logic [ CcuCfg.u.SlvPorts-1:0] cd_join_out_valid; + logic [ CcuCfg.u.SlvPorts-1:0] cd_join_out_ready; + logic [ CcuCfg.u.SlvPorts-1:0] cd_drop; + logic [ CcuCfg.u.SlvPorts-1:0] cd_filter_out_valid; + logic [ CcuCfg.u.SlvPorts-1:0] cd_filter_out_ready; + + logic [CcuCfg.SlvPortIdxWidth-1:0] cd_first_resp_d; + logic [CcuCfg.SlvPortIdxWidth-1:0] cd_first_resp_q; + logic [CcuCfg.SlvPortIdxWidth-1:0] cd_first_resp_idx; + logic cd_first_resp_empty; + logic cd_first_resp_valid_d; + logic cd_first_resp_valid_q; + + stream_fork_dynamic #( + .N_OUP(CcuCfg.u.SlvPorts) + ) u_cd_fork ( + .clk_i, + .rst_ni, + .valid_i (cd_sel_valid_i), + .ready_o (cd_sel_ready_o), + .sel_i (cd_sel_bv_i), + .sel_valid_i('1), + .sel_ready_o(), + .valid_o (cd_sel_fork_out_valid), + .ready_i (cd_sel_fork_out_ready & cd_last) + ); + + for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_cd_filter + + assign cd_last[i] = cd_i[i].last; + + // Only selected channels can advance + stream_join #( + .N_INP(2) + ) u_cd_join ( + .inp_valid_i({cd_sel_fork_out_valid[i], cd_valid_i[i]}), + .inp_ready_o({cd_sel_fork_out_ready[i], cd_ready_o[i]}), + .oup_valid_o(cd_join_out_valid[i]), + .oup_ready_i(cd_join_out_ready[i]) + ); + + // Drop non first responder + stream_filter u_cd_filter ( + .valid_i(cd_join_out_valid[i]), + .ready_o(cd_join_out_ready[i]), + .drop_i (cd_drop[i]), + .valid_o(cd_filter_out_valid[i]), + .ready_i(cd_filter_out_ready[i]) + ); + + end + + // Select the first responder among the CD channels + stream_mux #( + .N_INP (CcuCfg.u.SlvPorts), + .DATA_T(cd_t) + ) u_cd_mux ( + .inp_data_i (cd_i), + .inp_valid_i(cd_filter_out_valid), + .inp_ready_o(cd_filter_out_ready), + .inp_sel_i (cd_first_resp_d), + .oup_data_o (cd_o), + .oup_valid_o(cd_valid_o), + .oup_ready_i(cd_ready_i) + ); + + lzc #( + .WIDTH(CcuCfg.u.SlvPorts) + ) u_cd_lzc ( + .in_i (cd_join_out_valid), + .cnt_o (cd_first_resp_idx), + .empty_o(cd_first_resp_empty) + ); + + always_comb begin + cd_first_resp_valid_d = cd_first_resp_valid_q; + cd_first_resp_d = cd_first_resp_q; + + if (!cd_first_resp_valid_q && !cd_first_resp_empty) begin + // There is a valid response and the first responder + // has not been found yet + // ~> save the LZC index + cd_first_resp_d = cd_first_resp_idx; + // ~> mark the first responder as valid + cd_first_resp_valid_d = 1'b1; + end + + if (cd_sel_valid_i && cd_sel_ready_o) begin + // All CD channels have been processed + // ~> clean first responder valid + cd_first_resp_valid_d = 1'b0; + end + end + + // Drop all selected CD channels which responded after the first responder + assign cd_drop = cd_first_resp_valid_q ? ~(CcuCfg.u.SlvPorts'(1) << cd_first_resp_q) : '0; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cd_first_resp_q <= '0; + cd_first_resp_valid_q <= 1'b0; + end else begin + cd_first_resp_q <= cd_first_resp_d; + cd_first_resp_valid_q <= cd_first_resp_valid_d; + end + end + +endmodule diff --git a/src/ccu/ace_ccu_cd_ctrl.sv b/src/ccu/ace_ccu_cd_ctrl.sv new file mode 100644 index 0000000..7238bcf --- /dev/null +++ b/src/ccu/ace_ccu_cd_ctrl.sv @@ -0,0 +1,235 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "ace/assign.svh" + +module ace_ccu_cd_ctrl + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type ccu_ax_t = logic, + parameter type ccu_id_t = logic, + parameter type user_t = logic, + parameter type cd_t = logic, + parameter type slv_bv_t = logic, + parameter type w_t = logic, + parameter type ccu_r_t = logic +) ( + + input logic clk_i, + input logic rst_ni, + + // Ctrl + input logic valid_i, + output logic ready_o, + input ccu_ax_t ax_i, + input logic cd_ctrl_write_i, + input logic cd_ctrl_read_i, + input slv_bv_t cd_bv_i, + input logic r_resp_shared_i, + input logic r_resp_dirty_i, + + // CD snoop channel + input cd_t [CcuCfg.u.SlvPorts-1:0] cd_i, + input logic [CcuCfg.u.SlvPorts-1:0] cd_valid_i, + output logic [CcuCfg.u.SlvPorts-1:0] cd_ready_o, + + // Mst interface + output w_t w_o, + output logic w_valid_o, + input logic w_ready_i, + + // Slv interface + output ccu_r_t r_o, + output logic r_valid_o, + input logic r_ready_i +); + // Typedefs + // {{{ + typedef logic [CcuCfg.CachelineAxiTransfersIdxWidth-1:0] cl_axi_trans_idx_t; + + typedef struct packed { + ccu_id_t id; + logic cd_ctrl_write; + logic cd_ctrl_read; + cl_axi_trans_idx_t r_cd_start_trans; + logic r_resp_shared; + logic r_resp_dirty; + user_t r_user; + slv_bv_t cd_bv; + axi_pkg::len_t r_len; + } cd_ctrl_sync_reg_t; + // }}} + + // Internal signals + // {{{ + cl_axi_trans_idx_t r_cd_start_trans; + cd_ctrl_sync_reg_t cd_ctrl_sync_wdata; + cd_ctrl_sync_reg_t cd_ctrl_sync_rdata; + logic cd_ctrl_sync_valid; + logic cd_ctrl_sync_ready; + logic cd_valid; + logic cd_ready; + cd_t cd; + logic r_drop; + logic r_done_q; + logic r_done_d; + logic cd_trans_cnt_clr; + logic cd_trans_cnt_en; + cl_axi_trans_idx_t cd_trans_cnt; + logic r_len_cnt_clr; + logic r_len_cnt_en; + axi_pkg::len_t r_len_cnt; + // }}} + + // Input handshake decoupling + // {{{ + if (CcuCfg.CachelineAxiTransfers == 1) begin : gen_axi_start_trans_eqsize + assign r_cd_start_trans = '0; + end else begin : gen_axi_start_trans_eqsize + assign r_cd_start_trans = + ax_i.addr[CcuCfg.CachelineBytesIdxWidth-1:CcuCfg.AxiDataBytesIdxWidth]; + end + + assign cd_ctrl_sync_wdata = '{ + id: ax_i.id, + cd_ctrl_write: cd_ctrl_write_i, + cd_ctrl_read: cd_ctrl_read_i, + cd_bv: cd_bv_i, + r_cd_start_trans: r_cd_start_trans, + r_resp_shared: r_resp_shared_i, + r_resp_dirty: r_resp_dirty_i, + r_user: ax_i.user, + r_len: ax_i.len + }; + + fall_through_register #( + .T(cd_ctrl_sync_reg_t) + ) u_cd_ctrl_sync_reg ( + .clk_i, + .rst_ni, + .clr_i (1'b0), + .testmode_i(1'b0), + .data_i (cd_ctrl_sync_wdata), + .valid_i (valid_i), + .ready_o (ready_o), + .data_o (cd_ctrl_sync_rdata), + .valid_o (cd_ctrl_sync_valid), + .ready_i (cd_ctrl_sync_ready) + ); + // }}} + + // CD responses arbiter + // {{{ + ace_ccu_cd_arbiter #( + .CcuCfg(CcuCfg), + .cd_t (cd_t) + ) u_cd_merge ( + .clk_i, + .rst_ni, + .cd_valid_i (cd_valid_i), + .cd_ready_o (cd_ready_o), + .cd_i (cd_i), + .cd_sel_valid_i(cd_ctrl_sync_valid), + .cd_sel_ready_o(cd_ctrl_sync_ready), + .cd_sel_bv_i (cd_ctrl_sync_rdata.cd_bv), + .cd_valid_o (cd_valid), + .cd_ready_i (cd_ready), + .cd_o (cd) + ); + // }}} + + // CD forking + // {{{ + assign cd_sel_write = cd_ctrl_sync_rdata.cd_ctrl_write; + + assign cd_sel_read = cd_ctrl_sync_rdata.cd_ctrl_read && ~|{ + // Drop the first transfers if not needed + r_drop, + // Drop remaining transfers due to reduced transfer len + r_done_q}; + + stream_fork_dynamic #( + .N_OUP(2) + ) u_cd_fork ( + .clk_i, + .rst_ni, + .valid_i (cd_valid), + .ready_o (cd_ready), + .sel_i ({cd_sel_write, cd_sel_read}), + .sel_valid_i('1), + .sel_ready_o(), + .valid_o ({w_valid_o, r_valid_o}), + .ready_i ({w_ready_i, r_ready_i}) + ); + // }}} + + // R channel + // {{{ + counter #( + .WIDTH(CcuCfg.CachelineAxiTransfersIdxWidth) + ) u_cd_trans_counter ( + .clk_i, + .rst_ni, + .clear_i (cd_trans_cnt_clr), + .en_i (cd_trans_cnt_en), + .load_i (1'b0), + .down_i (1'b0), + .d_i ('0), + .q_o (cd_trans_cnt), + .overflow_o() + ); + + counter #( + .WIDTH($bits(axi_pkg::len_t)) + ) u_r_len_counter ( + .clk_i, + .rst_ni, + .clear_i (r_len_cnt_clr), + .en_i (r_len_cnt_en), + .load_i ('0), + .down_i ('0), + .d_i ('0), + .q_o (r_len_cnt), + .overflow_o() + ); + + assign r_drop = cd_trans_cnt != cd_ctrl_sync_rdata.r_cd_start_trans; + assign cd_trans_cnt_en = cd_valid && cd_ready && r_drop; + assign cd_trans_cnt_clr = cd_valid && cd_ready && cd.last; + + assign r_last = r_len_cnt == cd_ctrl_sync_rdata.r_len; + assign r_len_cnt_en = r_valid_o && r_ready_i; + assign r_len_cnt_clr = cd_valid && cd_ready && cd.last; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) r_done_q <= 1'b0; + else r_done_q <= r_done_d; + end + + assign r_done_d = !r_len_cnt_clr && ((r_last && r_len_cnt_en) || r_done_q); + + assign r_o = '{ + id: cd_ctrl_sync_rdata.id, + data: cd.data, + resp: {cd_ctrl_sync_rdata.r_resp_shared, cd_ctrl_sync_rdata.r_resp_dirty, 2'b0}, + last: cd.last, + user: cd_ctrl_sync_rdata.r_user + }; + // }}} + + // W channel + // {{{ + assign w_o = '{data: cd.data, strb: '1, last: cd.last, user: '0}; + // }}} + +endmodule diff --git a/src/ccu/ace_ccu_frontend.sv b/src/ccu/ace_ccu_frontend.sv new file mode 100644 index 0000000..f205060 --- /dev/null +++ b/src/ccu/ace_ccu_frontend.sv @@ -0,0 +1,256 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module ace_ccu_frontend + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type slv_bv_t = logic, + parameter type slv_idx_t = logic, + parameter type slv_aw_t = logic, + parameter type w_t = logic, + parameter type slv_b_t = logic, + parameter type slv_ar_t = logic, + parameter type slv_r_t = logic, + parameter type slv_req_t = logic, + parameter type slv_resp_t = logic, + parameter type ccu_aw_t = logic, + parameter type ccu_b_t = logic, + parameter type ccu_ar_t = logic, + parameter type ccu_r_t = logic, + parameter type ccu_req_t = logic, + parameter type ccu_resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_req_i, + output slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_resp_o, + + output ccu_req_t ccu_nonshareable_req_o, + input ccu_resp_t ccu_nonshareable_resp_i, + output ccu_req_t ccu_shareable_req_o, + input ccu_resp_t ccu_shareable_resp_i, + + output slv_bv_t ccu_shareable_rack_o, + output slv_bv_t ccu_shareable_wack_o +); + + // Internal signals + // {{{ + slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_req_cut; + slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_resp_cut; + + slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_nonshareable_req; + slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_nonshareable_resp; + slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_req; + slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_resp; + + slv_bv_t slv_r_nonshareable; + slv_bv_t slv_b_nonshareable; + slv_bv_t slv_rack_nonshareable; + slv_bv_t slv_wack_nonshareable; + // }}} + + // Slv demuxes + // {{{ + + // Demux slv traffic into blocking and non-blocking traffic + // Non-blocking traffic is expected to proceed even when the snoop + // interface is stalling + + for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_slv_demux + + logic aw_is_nonblocking; + logic ar_is_read_no_snoop; + + ace_cut #( + .BypassAw (!CcuCfg.u.CutSlvReq), + .BypassW (!CcuCfg.u.CutSlvReq), + .BypassB (!CcuCfg.u.CutSlvResp), + .BypassAr (!CcuCfg.u.CutSlvReq), + .BypassR (!CcuCfg.u.CutSlvResp), + .BypassAck (1'b1), + .aw_chan_t (slv_aw_t), + .w_chan_t (w_t), + .b_chan_t (slv_b_t), + .ar_chan_t (slv_ar_t), + .r_chan_t (slv_r_t), + .ace_req_t (slv_req_t), + .ace_resp_t(slv_resp_t) + ) u_ace_cut ( + .clk_i, + .rst_ni, + .slv_req_i (slv_req_i[i]), + .slv_resp_o(slv_resp_o[i]), + .mst_req_o (slv_req_cut[i]), + .mst_resp_i(slv_resp_cut[i]) + ); + + // Separate in each port blocking and non-blocking traffic + assign aw_is_nonblocking = aw_is_non_blocking( + slv_req_cut[i].aw.bar[0], slv_req_cut[i].aw.domain, slv_req_cut[i].aw.snoop + ); + + assign ar_is_read_no_snoop = is_read_no_snoop( + slv_req_cut[i].ar.bar[0], slv_req_cut[i].ar.domain, slv_req_cut[i].ar.snoop + ); + + axi_demux_simple #( + .AxiIdWidth (CcuCfg.u.AxiSlvIdWidth), + .AtopSupport(1'b1), + .axi_req_t (slv_req_t), + .axi_resp_t (slv_resp_t), + .NoMstPorts (2), + .MaxTrans (CcuCfg.u.MaxTransactions), + .AxiLookBits(CcuCfg.u.AxiIdLookupBits), + .UniqueIds (CcuCfg.u.AxiUniqueIds) + ) u_ace_demux ( + .clk_i, + .rst_ni, + .test_i (1'b0), + .slv_req_i (slv_req_cut[i]), + .slv_resp_o (slv_resp_cut[i]), + .slv_aw_select_i(aw_is_nonblocking), + .slv_ar_select_i(ar_is_read_no_snoop), + .mst_reqs_o ({slv_nonshareable_req[i], slv_shareable_req[i]}), + .mst_resps_i ({slv_nonshareable_resp[i], slv_shareable_resp[i]}), + .mst_b_idx_o (slv_b_nonshareable[i]), + .mst_r_idx_o (slv_r_nonshareable[i]) + ); + end + // }}} + + // Nonshareable mux + // {{{ + axi_mux #( + .SlvAxiIDWidth(CcuCfg.u.AxiSlvIdWidth), + .slv_aw_chan_t(slv_aw_t), + .mst_aw_chan_t(ccu_aw_t), + .w_chan_t (w_t), + .slv_b_chan_t (slv_b_t), + .mst_b_chan_t (ccu_b_t), + .slv_ar_chan_t(slv_ar_t), + .mst_ar_chan_t(ccu_ar_t), + .slv_r_chan_t (slv_r_t), + .mst_r_chan_t (ccu_r_t), + .slv_req_t (slv_req_t), + .slv_resp_t (slv_resp_t), + .mst_req_t (ccu_req_t), + .mst_resp_t (ccu_resp_t), + .NoSlvPorts (CcuCfg.u.SlvPorts), + .MaxWTrans (32'd8), + .FallThrough (1'b1), + .SpillAw (1'b0), + .SpillW (1'b0), + .SpillB (1'b0), + .SpillAr (1'b0), + .SpillR (1'b0) + ) u_ace_nonshareable_mux ( + .clk_i, + .rst_ni, + .test_i (1'b0), + .slv_reqs_i (slv_nonshareable_req), + .slv_resps_o(slv_nonshareable_resp), + .mst_req_o (ccu_nonshareable_req_o), + .mst_resp_i (ccu_nonshareable_resp_i) + ); + // }}} + + // Nonshareable demux + // {{{ + axi_mux #( + .SlvAxiIDWidth(CcuCfg.u.AxiSlvIdWidth), + .slv_aw_chan_t(slv_aw_t), + .mst_aw_chan_t(ccu_aw_t), + .w_chan_t (w_t), + .slv_b_chan_t (slv_b_t), + .mst_b_chan_t (ccu_b_t), + .slv_ar_chan_t(slv_ar_t), + .mst_ar_chan_t(ccu_ar_t), + .slv_r_chan_t (slv_r_t), + .mst_r_chan_t (ccu_r_t), + .slv_req_t (slv_req_t), + .slv_resp_t (slv_resp_t), + .mst_req_t (ccu_req_t), + .mst_resp_t (ccu_resp_t), + .NoSlvPorts (CcuCfg.u.SlvPorts), + .MaxWTrans (32'd8), + .FallThrough (1'b1), + .SpillAw (1'b0), + .SpillW (1'b0), + .SpillB (1'b0), + .SpillAr (1'b0), + .SpillR (1'b0) + ) u_ace_shareable_mux ( + .clk_i, + .rst_ni, + .test_i (1'b0), + .slv_reqs_i (slv_shareable_req), + .slv_resps_o(slv_shareable_resp), + .mst_req_o (ccu_shareable_req_o), + .mst_resp_i (ccu_shareable_resp_i) + ); + // }}} + + // Sharebale xacks generation + // {{{ + for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_xack_fifos + logic r_push, b_push; + + assign r_push = slv_resp_cut[i].r_valid && slv_req_cut[i].r_ready && slv_resp_cut[i].r.last; + assign b_push = slv_resp_cut[i].b_valid && slv_req_cut[i].b_ready; + + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DATA_WIDTH (1), + .DEPTH (CcuCfg.u.MaxTransactions) + ) u_r_tid_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (), + .empty_o (), + .usage_o (), + .data_i (slv_r_nonshareable[i]), + .push_i (r_push), + .data_o (slv_rack_nonshareable[i]), + .pop_i (slv_req_cut[i].rack) + ); + + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DATA_WIDTH (1), + .DEPTH (CcuCfg.u.MaxTransactions) + ) u_b_tid_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (), + .empty_o (), + .usage_o (), + .data_i (slv_b_nonshareable[i]), + .push_i (b_push), + .data_o (slv_wack_nonshareable[i]), + .pop_i (slv_req_cut[i].wack) + ); + end + + for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_shareable_xacks + assign ccu_shareable_rack_o[i] = slv_req_i[i].rack && !slv_rack_nonshareable[i]; + assign ccu_shareable_wack_o[i] = slv_req_i[i].wack && !slv_wack_nonshareable[i]; + end + // }}} + +endmodule diff --git a/src/ccu/ace_ccu_pkg.sv b/src/ccu/ace_ccu_pkg.sv new file mode 100644 index 0000000..797bfaa --- /dev/null +++ b/src/ccu/ace_ccu_pkg.sv @@ -0,0 +1,94 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +package ace_ccu_pkg; + + typedef struct packed { + // Number of slv ports + int unsigned SlvPorts; + // Maximum blocking inflight transactions + int unsigned MaxTransactions; + // Shareable W channel buffer size + int unsigned ShareableWFifoDepth; + // Instantiate replay table + bit ReplayEn; + // Address bits to be used during conflict checking + int unsigned NLineWidth; + // AXI/ACE parameters + int unsigned AxiAddrWidth; + int unsigned AxiDataWidth; + int unsigned AxiUserWidth; + int unsigned AxiSlvIdWidth; + // Unique IDs are passed to frontend demux + bit AxiUniqueIds; + // Lookup bits in frontend demux + int unsigned AxiIdLookupBits; + // Cache parameters + int unsigned CachelineWidth; + // I/O cuts + bit CutSlvReq; + bit CutSlvResp; + bit CutMstReq; + bit CutMstResp; + bit CutSnoopReq; + bit CutSnoopResp; + } ace_ccu_user_cfg_t; + + typedef struct packed { + // User parameters + ace_ccu_user_cfg_t u; + // Derived parameters + int unsigned SlvPortIdxWidth; + int unsigned TransactionIdxWidth; + int unsigned CachelineBytes; + int unsigned CachelineBytesIdxWidth; + int unsigned CachelineAddrWidth; + int unsigned CachelineAxiTransfers; + int unsigned CachelineAxiTransfersIdxWidth; + int unsigned AxiCcuIdWidth; + int unsigned AxiDataBytes; + int unsigned AxiDataBytesIdxWidth; + int unsigned AxiStrbWidth; + int unsigned AxiMstIdWidth; + int unsigned ArIdCounters; + } ace_ccu_cfg_t; + + function automatic ace_ccu_cfg_t ace_ccu_build_cfg(ace_ccu_user_cfg_t u); + ace_ccu_cfg_t p; + + p.u = u; + + p.SlvPortIdxWidth = $clog2(u.SlvPorts); + p.TransactionIdxWidth = $clog2(u.MaxTransactions); + p.CachelineBytes = u.CachelineWidth / 8; + p.CachelineBytesIdxWidth = $clog2(p.CachelineBytes); + p.CachelineAddrWidth = u.AxiAddrWidth - p.CachelineBytesIdxWidth; + p.CachelineAxiTransfers = u.CachelineWidth / u.AxiDataWidth; + p.CachelineAxiTransfersIdxWidth = $clog2(p.CachelineAxiTransfers); + p.AxiDataBytes = u.AxiDataWidth / 8; + p.AxiDataBytesIdxWidth = $clog2(p.AxiDataBytes); + p.AxiStrbWidth = u.AxiDataWidth / 8; + p.AxiCcuIdWidth = u.AxiSlvIdWidth + p.SlvPortIdxWidth; + p.AxiMstIdWidth = p.AxiCcuIdWidth + 1; + + return p; + endfunction + + // Typedefs + + // CD ctrl structure + typedef struct packed { + logic read; + logic write; + logic drop; + } cd_sel_t; + +endpackage diff --git a/src/ccu/ace_ccu_read.sv b/src/ccu/ace_ccu_read.sv new file mode 100644 index 0000000..020674c --- /dev/null +++ b/src/ccu/ace_ccu_read.sv @@ -0,0 +1,101 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "axi/assign.svh" +`include "ace/convert.svh" + +module ace_ccu_read + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type ccu_ax_t = logic, + parameter type tid_t = logic, + parameter type ccu_axi_ar_t = logic, + parameter type ccu_axi_r_t = logic, + parameter type ccu_ace_r_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + // Ctrl + input logic valid_i, + output logic ready_o, + input ccu_ax_t ax_i, + + // Snp interface + input ccu_ace_r_t cd_r_i, + input logic cd_r_valid_i, + output logic cd_r_ready_o, + + // Slv interface + output ccu_ace_r_t r_o, + output logic r_valid_o, + input logic r_ready_i, + // Mst interface + output ccu_axi_ar_t ar_o, + output logic ar_valid_o, + input logic ar_ready_i, + input ccu_axi_r_t r_i, + input logic r_valid_i, + output logic r_ready_o +); + + ccu_axi_ar_t ar_sync_wdata; + ccu_ace_r_t mem_r; + + // AR channel + // {{{ + `AXI_ASSIGN_AR_STRUCT(ar_sync_wdata, ax_i) + + fall_through_register #( + .T(ccu_axi_ar_t) + ) u_ar_sync_reg ( + .clk_i, + .rst_ni, + .clr_i (1'b0), + .testmode_i(1'b0), + .valid_i (valid_i), + .ready_o (ready_o), + .data_i (ar_sync_wdata), + .valid_o (ar_valid_o), + .ready_i (ar_ready_i), + .data_o (ar_o) + ); + // }}} + + // R channel + // {{{ + `AXI_TO_ACE_ASSIGN_R_STRUCT(mem_r, r_i) + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ace_r_t), + .AxiVldRdy(1'b1), + .LockIn (1'b1) + ) u_r_arbiter ( + .clk_i, + .rst_ni, + .flush_i(1'b0), + .rr_i (1'b0), + .req_i ({r_valid_i, cd_r_valid_i}), + .gnt_o ({r_ready_o, cd_r_ready_o}), + .data_i ({mem_r, cd_r_i}), + .req_o (r_valid_o), + .gnt_i (r_ready_i), + .data_o (r_o), + .idx_o () + ); + // }}} + + + +endmodule diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv new file mode 100644 index 0000000..775d47f --- /dev/null +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -0,0 +1,335 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module ace_ccu_snoop_pipe + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type domain_rule_t = logic, + parameter type ccu_aw_t = logic, + parameter type ccu_ar_t = logic, + parameter type ccu_ax_t = logic, + parameter type ccu_id_t = logic, + parameter type ac_t = logic, + parameter type cr_t = logic, + parameter type slv_bv_t = logic, + parameter type slv_idx_t = logic, + parameter type tid_t = logic, + parameter type nline_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input ccu_aw_t st0_aw_i, + input logic st0_aw_valid_i, + output logic st0_aw_ready_o, + + input ccu_ar_t st0_ar_i, + input logic st0_ar_valid_i, + output logic st0_ar_ready_o, + + input ccu_ar_t st0_replay_ar_i, + input logic st0_replay_ar_valid_i, + output logic st0_replay_ar_ready_o, + + output ac_t st0_ac_o, + output slv_bv_t st0_ac_bv_o, + output logic st0_ac_valid_o, + input logic st0_ac_ready_i, + + input cr_t st1_cr_i, + output slv_bv_t st1_cr_bv_o, + input logic st1_cr_valid_i, + output logic st1_cr_ready_o, + + input logic st0_replay_full_i, + output logic st0_replay_check_o, + input logic st0_replay_hit_i, + output logic st0_replay_alloc_o, + + input logic st0_tracker_full_i, + output logic st0_tracker_check_o, + input logic st0_tracker_check_hit_i, + output logic st0_tracker_alloc_o, + output logic st0_tracker_alloc_b_o, + output logic st0_tracker_alloc_r_o, + output nline_t st0_tracker_alloc_nline_o, + output ccu_id_t st0_tracker_alloc_id_o, + input tid_t st0_tracker_alloc_tid_i, + + input domain_rule_t [CcuCfg.u.SlvPorts-1:0] st0_domain_rule_i, + + output ccu_ax_t st1_ax_o, + output logic st1_ax_is_write_o, + output logic st1_r_resp_shared_o, + output logic st1_r_resp_dirty_o, + output tid_t st1_ax_tid_o, + output logic st1_cd_ctrl_write_o, + output logic st1_cd_ctrl_read_o, + output logic st1_write_valid_o, + input logic st1_write_ready_i, + output logic st1_read_valid_o, + input logic st1_read_ready_i, + output logic st1_cd_ctrl_valid_o, + input logic st1_cd_ctrl_ready_i +); + // Typedefs + // {{{ + typedef struct packed { + logic ax_is_write; + logic ar_accepts_dirty; + logic ar_accepts_dirty_shared; + logic ar_accepts_shared; + slv_bv_t cr_bv; + tid_t tid; + ccu_ax_t ax; + } st1_t; + // }}} + + // Internal signals + // {{{ + ccu_ax_t st0_ax; + logic st0_ax_valid; + logic st0_ax_ready; + acsnoop_t st0_ax_acsnoop; + axdomain_t st0_ax_domain; + logic st0_ar_accepts_dirty; + logic st0_ar_accepts_dirty_shared; + logic st0_ar_accepts_shared; + logic st0_pipe_valid; + logic st0_pipe_ready; + logic st0_stall; + logic st0_replay; + slv_idx_t st0_slv_idx; + st1_t st0_pipe; + + st1_t st1; + logic st1_valid; + logic st1_ready; + logic st1_r_resp_shared; + logic st1_r_resp_dirty; + logic st1_aw_sel; + logic st1_ar_sel; + logic st1_cd_sel; + logic st1_cd_write; + logic st1_cd_read; + // }}} + + // AX arbiter + // {{{ + ace_ccu_ax_arbiter #( + .CcuCfg (CcuCfg), + .ccu_aw_t(ccu_aw_t), + .ccu_ar_t(ccu_ar_t), + .ccu_ax_t(ccu_ax_t) + ) u_st0_ax_arbiter ( + .clk_i, + .rst_ni, + .replay_full_i (st0_replay_full_i), + .aw_i (st0_aw_i), + .aw_valid_i (st0_aw_valid_i), + .aw_ready_o (st0_aw_ready_o), + .ar_i (st0_ar_i), + .ar_valid_i (st0_ar_valid_i), + .ar_ready_o (st0_ar_ready_o), + .replay_ar_i (st0_replay_ar_i), + .replay_ar_valid_i (st0_replay_ar_valid_i), + .replay_ar_ready_o (st0_replay_ar_ready_o), + .ax_o (st0_ax), + .ax_valid_o (st0_ax_valid), + .ax_ready_i (st0_ax_ready), + .ax_is_write_o (st0_ax_is_write), + .ax_is_replay_o (st0_ax_is_replay), + .ax_acsnoop_o (st0_ax_acsnoop), + .ar_accepts_dirty_o (st0_ar_accepts_dirty), + .ar_accepts_dirty_shared_o(st0_ar_accepts_dirty_shared), + .ar_accepts_shared_o (st0_ar_accepts_shared), + .ax_domain_o (st0_ax_domain) + ); + // }}} + + // Stage 0 + // {{{ + always_comb begin : stall_comb + st0_stall = 1'b1; + st0_replay = 1'b0; + + if (!st0_tracker_full_i && st0_pipe_ready) begin + // Check if there is any conflict on nline or ID (tracker) + st0_tracker_check_o = 1'b1; + if (st0_ax_is_write) begin + // The AX originates from AW + if (st0_tracker_check_hit_i) begin + // Writes are not replayable + // nline conflict cause head of line stalling + // Resolution of conflicts happens by draining + // the downstream buffers + end else begin + // The write is clear to go + st0_stall = 1'b0; + end + end else begin + // The AX originates from AR + // Check also if there is any conflict on ID (replay) + st0_replay_check_o = !st0_ax_is_replay; + if (!st0_tracker_check_hit_i && !st0_replay_hit_i) begin + // No conflict is detected + st0_stall = 1'b0; + end else if (CcuCfg.u.ReplayEn) begin + // Reads are replayable + // ID or nline conflict is avoided by putting the request on hold + st0_replay = 1'b1; + end + end + end + end + + // Handshaking logic + assign st0_ac_valid_o = st0_stall ? 1'b0 : st0_ax_valid; + assign st0_ax_ready = st0_stall ? st0_replay : st0_ac_ready_i; + // Allocations + assign st0_tracker_alloc_o = st0_ax_valid && st0_ax_ready && !st0_replay; + assign st0_tracker_alloc_b_o = st0_ax_is_write || st0_ax.atop[axi_pkg::ATOP_R_RESP]; + assign st0_tracker_alloc_r_o = !st0_ax_is_write; + assign st0_tracker_alloc_nline_o = st0_ax.addr[CcuCfg.CachelineBytesIdxWidth+:CcuCfg.u.NLineWidth]; + assign st0_tracker_alloc_id_o = st0_ax.id; + assign st0_replay_alloc_o = st0_ax_valid && st0_ax_ready && st0_replay; + + assign st0_pipe_valid = st0_ac_valid_o && st0_ac_ready_i; + + assign st0_slv_idx = st0_ax.id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth]; + + always_comb begin : ac_bv_comb + unique case (st0_ax_domain) + InnerShareable: st0_ac_bv_o = st0_domain_rule_i[st0_slv_idx].inner; + OuterShareable: st0_ac_bv_o = st0_domain_rule_i[st0_slv_idx].outer; + System: st0_ac_bv_o = ~st0_domain_rule_i[st0_slv_idx].initiator; + default: st0_ac_bv_o = '0; + endcase + end + + assign st0_ac_o = '{ + addr: axi_pkg::aligned_addr(st0_ax.addr, CcuCfg.CachelineBytesIdxWidth), + snoop: st0_ax_acsnoop, + prot: '0 + }; + + assign st0_pipe = '{ + ax_is_write: st0_ax_is_write, + ar_accepts_dirty: st0_ar_accepts_dirty, + ar_accepts_dirty_shared: st0_ar_accepts_dirty_shared, + ar_accepts_shared: st0_ar_accepts_shared, + cr_bv: st0_ac_bv_o, + tid: st0_tracker_alloc_tid_i, + ax: st0_ax + }; + + // Replay allocation fields + assign replay_alloc_acsnoop_o = st0_ax_acsnoop; + assign replay_alloc_accepts_dirty_o = st0_ar_accepts_dirty; + assign replay_alloc_accepts_dirty_shared_o = st0_ar_accepts_dirty_shared; + assign replay_alloc_accepts_shared_o = st0_ar_accepts_shared; + // }}} + + // Stage 1 + // {{{ + spill_register #( + .T (st1_t), + .Bypass(1'b0) + ) u_st1_pipe_reg ( + .clk_i, + .rst_ni, + .valid_i(st0_pipe_valid), + .ready_o(st0_pipe_ready), + .data_i (st0_pipe), + .valid_o(st1_valid), + .ready_i(st1_ready), + .data_o (st1) + ); + + stream_join #( + .N_INP(2) + ) u_st1_join ( + .inp_valid_i({st1_valid, st1_cr_valid_i}), + .inp_ready_o({st1_ready, st1_cr_ready_o}), + .oup_valid_o(st1_pipe_valid), + .oup_ready_i(st1_pipe_ready) + ); + + assign st1_cr_bv_o = st1.cr_bv; + + assign st1_r_resp_shared = st1_cr_i.IsShared; + assign st1_r_resp_dirty = st1_cr_i.PassDirty && st1.ar_accepts_dirty; + + always_comb begin + st1_aw_sel = 1'b0; + st1_ar_sel = 1'b0; + st1_cd_sel = 1'b0; + + st1_cd_write = 1'b0; + st1_cd_read = 1'b0; + + if (st1.ax_is_write) begin + // The transactions is a shareable write + st1_aw_sel = 1'b1; + if (st1_cr_i.DataTransfer) begin + // A writeback is expected + st1_cd_sel = 1'b1; + // If dirty data is passed, do a writeback + // otherwise CD data will be dropped + st1_cd_write = st1_cr_i.PassDirty; + end + end else begin + // The transactions is a shareable read + if (st1_cr_i.DataTransfer) begin + // A cacheline is expected on CD + st1_cd_sel = 1'b1; + st1_cd_read = 1'b1; + if (st1_cr_i.PassDirty && !st1.ar_accepts_dirty) begin + // The cacheline is dirty but the initiator cannot accept it + st1_aw_sel = 1'b1; + st1_cd_write = 1'b1; + end + end else begin + // The cacheline must be obtained from memory + st1_ar_sel = 1'b1; + end + end + end + + stream_fork_dynamic #( + .N_OUP(3) + ) u_st1_fork ( + .clk_i, + .rst_ni, + .valid_i (st1_pipe_valid), + .ready_o (st1_pipe_ready), + .sel_i ({st1_aw_sel, st1_ar_sel, st1_cd_sel}), + .sel_valid_i('1), + .sel_ready_o(), + .valid_o ({st1_write_valid_o, st1_read_valid_o, st1_cd_ctrl_valid_o}), + .ready_i ({st1_write_ready_i, st1_read_ready_i, st1_cd_ctrl_ready_i}) + ); + // }}} + + // Pipe outputs + // {{{ + assign st1_ax_o = st1.ax; + assign st1_ax_is_write_o = st1.ax_is_write; + assign st1_r_resp_shared_o = st1_r_resp_shared; + assign st1_r_resp_dirty_o = st1_r_resp_dirty; + assign st1_cd_ctrl_write_o = st1_cd_write; + assign st1_cd_ctrl_read_o = st1_cd_read; + assign st1_ax_tid_o = st1.tid; + // }}} + +endmodule diff --git a/src/ccu/ace_ccu_top.sv b/src/ccu/ace_ccu_top.sv new file mode 100644 index 0000000..298e124 --- /dev/null +++ b/src/ccu/ace_ccu_top.sv @@ -0,0 +1,713 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "axi/assign.svh" +`include "axi/typedef.svh" +`include "ace/assign.svh" +`include "ace/typedef.svh" +`include "ace/convert.svh" +`include "ace/domain.svh" + +module ace_ccu_top + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type domain_rule_t = logic, + parameter type slv_ar_t = logic, + parameter type slv_aw_t = logic, + parameter type w_t = logic, + parameter type slv_b_t = logic, + parameter type slv_r_t = logic, + parameter type slv_req_t = logic, + parameter type slv_resp_t = logic, + parameter type mst_ar_t = logic, + parameter type mst_aw_t = logic, + parameter type mst_b_t = logic, + parameter type mst_r_t = logic, + parameter type mst_req_t = logic, + parameter type mst_resp_t = logic, + parameter type snoop_ac_t = logic, + parameter type snoop_cr_t = logic, + parameter type snoop_cd_t = logic, + parameter type snoop_req_t = logic, + parameter type snoop_resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_req_i, + output slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_resp_o, + + input domain_rule_t [CcuCfg.u.SlvPorts-1:0] domain_rule_i, + + output snoop_req_t [CcuCfg.u.SlvPorts-1:0] snoop_req_o, + input snoop_resp_t [CcuCfg.u.SlvPorts-1:0] snoop_resp_i, + + output mst_req_t mst_req_o, + input mst_resp_t mst_resp_i +); + + // Typdefs + // {{{ + + // AXI/ACE types + typedef logic [CcuCfg.u.AxiSlvIdWidth-1:0] slv_id_t; + typedef logic [CcuCfg.AxiCcuIdWidth-1:0] ccu_id_t; + typedef logic [CcuCfg.AxiMstIdWidth-1:0] mst_id_t; + typedef logic [CcuCfg.u.AxiAddrWidth-1:0] addr_t; + typedef logic [CcuCfg.u.AxiDataWidth-1:0] data_t; + typedef logic [CcuCfg.AxiStrbWidth-1:0] strb_t; + typedef logic [CcuCfg.u.AxiUserWidth-1:0] user_t; + + // Intermediate ACE and AXI channel types + `ACE_TYPEDEF_AW_CHAN_T(ccu_ace_aw_t, addr_t, ccu_id_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(ccu_ace_b_t, ccu_id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(ccu_ace_ar_t, addr_t, ccu_id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(ccu_ace_r_t, data_t, ccu_id_t, user_t) + `ACE_TYPEDEF_REQ_T(ccu_ace_req_t, ccu_ace_aw_t, w_t, ccu_ace_ar_t) + `ACE_TYPEDEF_RESP_T(ccu_ace_resp_t, ccu_ace_b_t, ccu_ace_r_t) + + `AXI_TYPEDEF_AW_CHAN_T(ccu_axi_aw_t, addr_t, ccu_id_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(ccu_axi_b_t, ccu_id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ccu_axi_ar_t, addr_t, ccu_id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(ccu_axi_r_t, data_t, ccu_id_t, user_t) + `AXI_TYPEDEF_REQ_T(ccu_axi_req_t, ccu_axi_aw_t, w_t, ccu_axi_ar_t) + `AXI_TYPEDEF_RESP_T(ccu_axi_resp_t, ccu_axi_b_t, ccu_axi_r_t) + + // Transaction ID type + typedef logic [CcuCfg.TransactionIdxWidth-1:0] tid_t; + typedef logic [CcuCfg.u.SlvPorts-1:0] slv_bv_t; + typedef logic [CcuCfg.SlvPortIdxWidth-1:0] slv_idx_t; + typedef logic [CcuCfg.u.NLineWidth-1:0] nline_t; + + // Internal AW/AR request unified representation + typedef struct packed { + ccu_id_t id; + addr_t addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + axi_pkg::atop_t atop; + user_t user; + } ccu_ax_t; + // }}} + + // Internal signals + // {{{ + ccu_ace_req_t ccu_nonshareable_req; + ccu_ace_resp_t ccu_nonshareable_resp; + ccu_ace_req_t ccu_shareable_req; + ccu_ace_resp_t ccu_shareable_resp; + slv_bv_t ccu_shareable_rack; + slv_bv_t ccu_shareable_wack; + + ccu_ace_ar_t replay_ar; + logic replay_ar_valid; + logic replay_ar_ready; + + snoop_ac_t ac; + slv_bv_t ac_bv; + logic ac_valid; + logic ac_ready; + snoop_cr_t cr; + slv_bv_t cr_bv; + logic cr_valid; + logic cr_ready; + slv_bv_t snoop_ac_valid; + slv_bv_t snoop_ac_ready; + snoop_ac_t [CcuCfg.u.SlvPorts-1:0] snoop_ac; + slv_bv_t snoop_cr_valid; + slv_bv_t snoop_cr_ready; + snoop_cr_t [CcuCfg.u.SlvPorts-1:0] snoop_cr; + slv_bv_t snoop_cd_valid; + slv_bv_t snoop_cd_ready; + snoop_cd_t [CcuCfg.u.SlvPorts-1:0] snoop_cd; + + logic replay_full; + logic replay_check; + logic replay_hit; + logic replay_alloc; + + logic tracker_full; + logic tracker_check; + logic tracker_check_hit; + logic tracker_alloc; + logic tracker_alloc_b; + logic tracker_alloc_r; + nline_t tracker_alloc_nline; + ccu_id_t tracker_alloc_id; + tid_t tracker_alloc_tid; + logic tracker_dealloc_r_resp; + logic tracker_dealloc_b_resp; + logic tracker_dealloc_check_b_resp; + ccu_id_t tracker_dealloc_r_resp_id; + ccu_id_t tracker_dealloc_b_resp_id; + logic tracker_dealloc_b_resp_wb; + logic tracker_updt_wb; + tid_t tracker_updt_wb_tid; + + ccu_ax_t pipe_ax; + logic pipe_ax_is_write; + logic pipe_r_resp_shared; + logic pipe_r_resp_dirty; + slv_bv_t pipe_cd_bv; + tid_t pipe_ax_tid; + logic pipe_cd_ctrl_write; + logic pipe_cd_ctrl_read; + logic write_valid; + logic write_ready; + logic read_valid; + logic read_ready; + logic cd_ctrl_valid; + logic cd_ctrl_read; + + w_t write_w; + logic write_w_valid; + logic write_w_ready; + + w_t cd_w; + logic cd_w_valid; + logic cd_w_ready; + ccu_ace_r_t cd_r; + logic cd_r_valid; + logic cd_r_ready; + + ccu_axi_req_t axi_shareable_req; + ccu_axi_resp_t axi_shareable_resp; + ccu_axi_req_t axi_nonshareable_req; + ccu_axi_resp_t axi_nonshareable_resp; + + mst_req_t mst_req; + mst_resp_t mst_resp; + + // }}} + + // Frontend + // {{{ + ace_ccu_frontend #( + .CcuCfg (CcuCfg), + .slv_bv_t (slv_bv_t), + .slv_idx_t (slv_idx_t), + .slv_aw_t (slv_aw_t), + .w_t (w_t), + .slv_b_t (slv_b_t), + .slv_ar_t (slv_ar_t), + .slv_r_t (slv_r_t), + .slv_req_t (slv_req_t), + .slv_resp_t(slv_resp_t), + .ccu_aw_t (ccu_ace_aw_t), + .ccu_b_t (ccu_ace_b_t), + .ccu_ar_t (ccu_ace_ar_t), + .ccu_r_t (ccu_ace_r_t), + .ccu_req_t (ccu_ace_req_t), + .ccu_resp_t(ccu_ace_resp_t) + ) u_ace_ccu_frontend ( + .clk_i, + .rst_ni, + .slv_req_i, + .slv_resp_o, + .ccu_nonshareable_req_o (ccu_nonshareable_req), + .ccu_nonshareable_resp_i(ccu_nonshareable_resp), + .ccu_shareable_req_o (ccu_shareable_req), + .ccu_shareable_resp_i (ccu_shareable_resp), + .ccu_shareable_rack_o (ccu_shareable_rack), + .ccu_shareable_wack_o (ccu_shareable_wack) + ); + // }}} + + // Snoop pipeline + // {{{ + ace_ccu_snoop_pipe #( + .CcuCfg (CcuCfg), + .domain_rule_t(domain_rule_t), + .ccu_ax_t (ccu_ax_t), + .ccu_aw_t (ccu_ace_aw_t), + .ccu_ar_t (ccu_ace_ar_t), + .ccu_id_t (ccu_id_t), + .ac_t (snoop_ac_t), + .cr_t (snoop_cr_t), + .slv_bv_t (slv_bv_t), + .slv_idx_t (slv_idx_t), + .tid_t (tid_t), + .nline_t (nline_t) + ) u_ace_ccu_snoop_pipe ( + .clk_i, + .rst_ni, + .st0_aw_i (ccu_shareable_req.aw), + .st0_aw_valid_i (ccu_shareable_req.aw_valid), + .st0_aw_ready_o (ccu_shareable_resp.aw_ready), + .st0_ar_i (ccu_shareable_req.ar), + .st0_ar_valid_i (ccu_shareable_req.ar_valid), + .st0_ar_ready_o (ccu_shareable_resp.ar_ready), + .st0_replay_ar_i (replay_ar), + .st0_replay_ar_valid_i (replay_ar_valid), + .st0_replay_ar_ready_o (replay_ar_ready), + .st0_ac_o (ac), + .st0_ac_bv_o (ac_bv), + .st0_ac_valid_o (ac_valid), + .st0_ac_ready_i (ac_ready), + .st0_replay_full_i (replay_full), + .st0_replay_check_o (replay_check), + .st0_replay_hit_i (replay_hit), + .st0_replay_alloc_o (replay_alloc), + .st0_tracker_full_i (tracker_full), + .st0_tracker_check_o (tracker_check), + .st0_tracker_check_hit_i (tracker_check_hit), + .st0_tracker_alloc_o (tracker_alloc), + .st0_tracker_alloc_b_o (tracker_alloc_b), + .st0_tracker_alloc_r_o (tracker_alloc_r), + .st0_tracker_alloc_nline_o(tracker_alloc_nline), + .st0_tracker_alloc_id_o (tracker_alloc_id), + .st0_tracker_alloc_tid_i (tracker_alloc_tid), + .st0_domain_rule_i (domain_rule_i), + .st1_cr_i (cr), + .st1_cr_bv_o (cr_bv), + .st1_cr_valid_i (cr_valid), + .st1_cr_ready_o (cr_ready), + .st1_ax_o (pipe_ax), + .st1_ax_is_write_o (pipe_ax_is_write), + .st1_r_resp_shared_o (pipe_r_resp_shared), + .st1_r_resp_dirty_o (pipe_r_resp_dirty), + .st1_ax_tid_o (pipe_ax_tid), + .st1_cd_ctrl_write_o (pipe_cd_ctrl_write), + .st1_cd_ctrl_read_o (pipe_cd_ctrl_read), + .st1_write_valid_o (write_valid), + .st1_write_ready_i (write_ready), + .st1_read_valid_o (read_valid), + .st1_read_ready_i (read_ready), + .st1_cd_ctrl_valid_o (cd_ctrl_valid), + .st1_cd_ctrl_ready_i (cd_ctrl_ready) + ); + + stream_fork_dynamic #( + .N_OUP(CcuCfg.u.SlvPorts) + ) u_ace_ccu_ac_fork ( + .clk_i, + .rst_ni, + .valid_i (ac_valid), + .ready_o (ac_ready), + .sel_i (ac_bv), + .sel_valid_i(1'b1), + .sel_ready_o(), + .valid_o (snoop_ac_valid), + .ready_i (snoop_ac_ready) + ); + + assign snoop_ac = {CcuCfg.u.SlvPorts{ac}}; + + stream_join_dynamic #( + .N_INP(CcuCfg.u.SlvPorts) + ) u_cr_join ( + .inp_valid_i(snoop_cr_valid), + .inp_ready_o(snoop_cr_ready), + .sel_i (cr_bv), + .oup_valid_o(cr_valid), + .oup_ready_i(cr_ready) + ); + + always_comb begin : cr_merge_comb + cr = '0; + pipe_cd_bv = '0; + for (int unsigned i = 0; i < CcuCfg.u.SlvPorts; i++) begin + if (cr_bv[i]) begin + cr |= snoop_cr[i]; + pipe_cd_bv[i] = snoop_cr[i].DataTransfer; + end + end + end + // }}} + + // Shareable W buffer + // {{{ + stream_fifo #( + .FALL_THROUGH(1'b0), + .DEPTH (CcuCfg.u.ShareableWFifoDepth), + .T (w_t) + ) u_shareable_w_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (), + .data_i (ccu_shareable_req.w), + .valid_i (ccu_shareable_req.w_valid), + .ready_o (ccu_shareable_resp.w_ready), + .data_o (write_w), + .valid_o (write_w_valid), + .ready_i (write_w_ready) + ); + // }}} + + // Tracker + // {{{ + assign tracker_dealloc_r_resp = ccu_shareable_req.r_ready && ccu_shareable_resp.r_valid && ccu_shareable_resp.r.last; + assign tracker_dealloc_r_resp_id = ccu_shareable_resp.r.id; + assign tracker_dealloc_check_b_resp = axi_shareable_resp.b_valid; + assign tracker_dealloc_b_resp = ccu_shareable_req.b_ready && ccu_shareable_resp.b_valid; + assign tracker_dealloc_b_resp_id = ccu_shareable_resp.b.id; + + ace_ccu_tracker #( + .CcuCfg (CcuCfg), + .slv_bv_t (slv_bv_t), + .slv_idx_t(slv_idx_t), + .nline_t (nline_t), + .ccu_id_t (ccu_id_t), + .tid_t (tid_t) + ) u_ace_ccu_tracker ( + .clk_i, + .rst_ni, + .full_o (tracker_full), + .check_i (tracker_check), + .check_hit_o (tracker_check_hit), + .alloc_i (tracker_alloc), + .alloc_b_i (tracker_alloc_b), + .alloc_r_i (tracker_alloc_r), + .alloc_nline_i (tracker_alloc_nline), + .alloc_id_i (tracker_alloc_id), + .alloc_tid_o (tracker_alloc_tid), + .dealloc_rack_i (ccu_shareable_rack), + .dealloc_wack_i (ccu_shareable_wack), + .dealloc_r_resp_i (tracker_dealloc_r_resp), + .dealloc_r_resp_id_i (tracker_dealloc_r_resp_id), + .dealloc_b_resp_i (tracker_dealloc_b_resp), + .dealloc_check_b_resp_i(tracker_dealloc_check_b_resp), + .dealloc_b_resp_id_i (tracker_dealloc_b_resp_id), + .dealloc_b_resp_wb_o (tracker_dealloc_b_resp_wb), + .updt_wb_i (tracker_updt_wb), + .updt_wb_tid_i (tracker_updt_wb_tid) + ); + // }}} + + + // Replay table + // {{{ + if (CcuCfg.u.ReplayEn) begin : gen_replay + $fatal(-1, "Replay table not yet implemented."); + end else begin : gen_no_replay + assign replay_full = 1'b0; + assign replay_hit = 1'b0; + assign replay_ar = '0; + assign replay_ar_valid = 1'b0; + end + // }}} + + // Write Unit + // {{{ + ace_ccu_write #( + .CcuCfg (CcuCfg), + .ccu_ax_t(ccu_ax_t), + .tid_t (tid_t), + .ccu_aw_t(ccu_axi_aw_t), + .w_t (w_t), + .ccu_b_t (ccu_axi_b_t) + ) u_ace_ccu_write ( + .clk_i, + .rst_ni, + .valid_i (write_valid), + .ready_o (write_ready), + .ax_i (pipe_ax), + .ax_is_write_i (pipe_ax_is_write), + .ax_is_writeback_i (pipe_cd_ctrl_write), + .ax_tid_i (pipe_ax_tid), + .tracker_updt_wb_o (tracker_updt_wb), + .tracker_updt_wb_tid_o(tracker_updt_wb_tid), + .b_is_writeback_i (tracker_dealloc_b_resp_wb), + .w_i (write_w), + .w_valid_i (write_w_valid), + .w_ready_o (write_w_ready), + .cd_w_i (cd_w), + .cd_w_valid_i (cd_w_valid), + .cd_w_ready_o (cd_w_ready), + .b_o (ccu_shareable_resp.b), + .b_valid_o (ccu_shareable_resp.b_valid), + .b_ready_i (ccu_shareable_req.b_ready), + .aw_o (axi_shareable_req.aw), + .aw_valid_o (axi_shareable_req.aw_valid), + .aw_ready_i (axi_shareable_resp.aw_ready), + .w_o (axi_shareable_req.w), + .w_valid_o (axi_shareable_req.w_valid), + .w_ready_i (axi_shareable_resp.w_ready), + .b_i (axi_shareable_resp.b), + .b_valid_i (axi_shareable_resp.b_valid), + .b_ready_o (axi_shareable_req.b_ready) + ); + // }}} + + // Read Unit + // {{{ + ace_ccu_read #( + .CcuCfg (CcuCfg), + .ccu_ax_t (ccu_ax_t), + .tid_t (tid_t), + .ccu_axi_ar_t(ccu_axi_ar_t), + .ccu_axi_r_t (ccu_axi_r_t), + .ccu_ace_r_t (ccu_ace_r_t) + ) u_ace_ccu_read_unit ( + .clk_i, + .rst_ni, + .valid_i (read_valid), + .ready_o (read_ready), + .ax_i (pipe_ax), + .cd_r_i (cd_r), + .cd_r_valid_i(cd_r_valid), + .cd_r_ready_o(cd_r_ready), + .r_o (ccu_shareable_resp.r), + .r_valid_o (ccu_shareable_resp.r_valid), + .r_ready_i (ccu_shareable_req.r_ready), + .ar_o (axi_shareable_req.ar), + .ar_valid_o (axi_shareable_req.ar_valid), + .ar_ready_i (axi_shareable_resp.ar_ready), + .r_i (axi_shareable_resp.r), + .r_valid_i (axi_shareable_resp.r_valid), + .r_ready_o (axi_shareable_req.r_ready) + ); + // }}} + + + // CD Ctrl Unit + // {{{ + ace_ccu_cd_ctrl #( + .CcuCfg (CcuCfg), + .ccu_ax_t(ccu_ax_t), + .ccu_id_t(ccu_id_t), + .user_t (user_t), + .cd_t (snoop_cd_t), + .slv_bv_t(slv_bv_t), + .w_t (w_t), + .ccu_r_t (ccu_ace_r_t) + ) u_ace_ccu_cd_ctrl ( + .clk_i, + .rst_ni, + .valid_i (cd_ctrl_valid), + .ready_o (cd_ctrl_ready), + .ax_i (pipe_ax), + .cd_ctrl_write_i(pipe_cd_ctrl_write), + .cd_ctrl_read_i (pipe_cd_ctrl_read), + .cd_bv_i (pipe_cd_bv), + .r_resp_shared_i(pipe_r_resp_shared), + .r_resp_dirty_i (pipe_r_resp_dirty), + .cd_i (snoop_cd), + .cd_valid_i (snoop_cd_valid), + .cd_ready_o (snoop_cd_ready), + .w_o (cd_w), + .w_valid_o (cd_w_valid), + .w_ready_i (cd_w_ready), + .r_o (cd_r), + .r_valid_o (cd_r_valid), + .r_ready_i (cd_r_ready) + ); + // }}} + + // Mst mux + // {{{ + `ACE_TO_AXI_ASSIGN_REQ(axi_nonshareable_req, ccu_nonshareable_req) + `AXI_TO_ACE_ASSIGN_RESP(ccu_nonshareable_resp, axi_nonshareable_resp) + + axi_mux #( + .SlvAxiIDWidth(CcuCfg.AxiCcuIdWidth), + .slv_aw_chan_t(ccu_axi_aw_t), + .mst_aw_chan_t(mst_aw_t), + .w_chan_t (w_t), + .slv_b_chan_t (ccu_axi_b_t), + .mst_b_chan_t (mst_b_t), + .slv_ar_chan_t(ccu_axi_ar_t), + .mst_ar_chan_t(mst_ar_t), + .slv_r_chan_t (ccu_axi_r_t), + .mst_r_chan_t (mst_r_t), + .slv_req_t (ccu_axi_req_t), + .slv_resp_t (ccu_axi_resp_t), + .mst_req_t (mst_req_t), + .mst_resp_t (mst_resp_t), + .NoSlvPorts (2), + .MaxWTrans (32'd8), + .FallThrough (1'b1), + .SpillAw (1'b0), + .SpillW (1'b0), + .SpillB (1'b0), + .SpillAr (1'b0), + .SpillR (1'b0) + ) u_axi_mst_mux ( + .clk_i, + .rst_ni, + .test_i (1'b0), + .slv_reqs_i ({axi_nonshareable_req, axi_shareable_req}), + .slv_resps_o({axi_nonshareable_resp, axi_shareable_resp}), + .mst_req_o (mst_req), + .mst_resp_i (mst_resp) + ); + // }}} + + // ACE/AXI cuts + // {{{ + for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_snoop_cut + + snoop_req_t snoop_req; + snoop_resp_t snoop_resp; + + assign snoop_req.ac_valid = snoop_ac_valid[i]; + assign snoop_ac_ready[i] = snoop_resp.ac_ready; + assign snoop_req.ac = snoop_ac[i]; + + assign snoop_cr_valid[i] = snoop_resp.cr_valid; + assign snoop_req.cr_ready = snoop_cr_ready[i]; + assign snoop_cr[i] = snoop_resp.cr_resp; + + assign snoop_cd_valid[i] = snoop_resp.cd_valid; + assign snoop_req.cd_ready = snoop_cd_ready[i]; + assign snoop_cd[i] = snoop_resp.cd; + + ace_snoop_cut #( + .BypassAc (!CcuCfg.u.CutSnoopReq), + .BypassCr (!CcuCfg.u.CutSnoopResp), + .BypassCd (!CcuCfg.u.CutSnoopResp), + .ac_chan_t (snoop_ac_t), + .cd_chan_t (snoop_cd_t), + .cr_chan_t (snoop_cr_t), + .snoop_req_t (snoop_req_t), + .snoop_resp_t(snoop_resp_t) + ) u_snoop_cut ( + .clk_i, + .rst_ni, + .slv_req_i (snoop_req), + .slv_resp_o(snoop_resp), + .mst_req_o (snoop_req_o[i]), + .mst_resp_i(snoop_resp_i[i]) + ); + end + + axi_cut #( + .BypassAw (!CcuCfg.u.CutMstReq), + .BypassW (!CcuCfg.u.CutMstReq), + .BypassB (!CcuCfg.u.CutMstResp), + .BypassAr (!CcuCfg.u.CutMstReq), + .BypassR (!CcuCfg.u.CutMstResp), + .aw_chan_t (mst_aw_t), + .w_chan_t (w_t), + .b_chan_t (mst_b_t), + .ar_chan_t (mst_ar_t), + .r_chan_t (mst_r_t), + .axi_req_t (mst_req_t), + .axi_resp_t(mst_resp_t) + ) u_mst_cut ( + .clk_i, + .rst_ni, + .slv_req_i (mst_req), + .slv_resp_o(mst_resp), + .mst_req_o (mst_req_o), + .mst_resp_i(mst_resp_i) + ); + // }}} + +endmodule + +module ace_ccu_top_intf + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CCU_CFG = '{default: '0}, + localparam type domain_bv_t = `DOMAIN_BV_T(CCU_CFG.u.SlvPorts), + localparam type domain_rule_t = `DOMAIN_RULE_T(domain_bv_t) +) ( + input logic clk_i, + input logic rst_ni, + input domain_rule_t [CCU_CFG.u.SlvPorts-1:0] domain_rule_i, + ACE_BUS.Slave slv [CCU_CFG.u.SlvPorts], + SNOOP_BUS.Slave snoop [CCU_CFG.u.SlvPorts], + AXI_BUS.Master mst +); + + typedef logic [CCU_CFG.u.AxiSlvIdWidth-1:0] slv_id_t; + typedef logic [CCU_CFG.AxiMstIdWidth-1:0] mst_id_t; + typedef logic [CCU_CFG.u.AxiAddrWidth-1:0] addr_t; + typedef logic [CCU_CFG.u.AxiDataWidth-1:0] data_t; + typedef logic [CCU_CFG.u.AxiDataWidth/8-1:0] strb_t; + typedef logic [CCU_CFG.u.AxiUserWidth-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(slv_aw_t, addr_t, slv_id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(slv_b_t, slv_id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(slv_ar_t, addr_t, slv_id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(slv_r_t, data_t, slv_id_t, user_t) + `ACE_TYPEDEF_REQ_T(slv_req_t, slv_aw_t, w_t, slv_ar_t) + `ACE_TYPEDEF_RESP_T(slv_resp_t, slv_b_t, slv_r_t) + + `AXI_TYPEDEF_AW_CHAN_T(mst_aw_t, addr_t, mst_id_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(mst_b_t, mst_id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(mst_ar_t, addr_t, mst_id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(mst_r_t, data_t, mst_id_t, user_t) + `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_t, w_t, mst_ar_t) + `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_t, mst_r_t) + + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + slv_req_t [CCU_CFG.u.SlvPorts-1:0] slv_req; + slv_resp_t [CCU_CFG.u.SlvPorts-1:0] slv_resp; + + mst_req_t mst_req; + mst_resp_t mst_resp; + + snoop_req_t [CCU_CFG.u.SlvPorts-1:0] snoop_req; + snoop_resp_t [CCU_CFG.u.SlvPorts-1:0] snoop_resp; + + for (genvar i = 0; i < CCU_CFG.u.SlvPorts; i++) begin : gen_bus_assignments + `ACE_ASSIGN_TO_REQ(slv_req[i], slv[i]) + `ACE_ASSIGN_FROM_RESP(slv[i], slv_resp[i]) + `SNOOP_ASSIGN_FROM_REQ(snoop[i], snoop_req[i]) + `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop[i]) + end + + `AXI_ASSIGN_FROM_REQ(mst, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, mst) + + ace_ccu_top #( + .CcuCfg (CCU_CFG), + .domain_rule_t(domain_rule_t), + .slv_ar_t (slv_ar_t), + .slv_aw_t (slv_aw_t), + .w_t (w_t), + .slv_b_t (slv_b_t), + .slv_r_t (slv_r_t), + .slv_req_t (slv_req_t), + .slv_resp_t (slv_resp_t), + .mst_ar_t (mst_ar_t), + .mst_aw_t (mst_aw_t), + .mst_b_t (mst_b_t), + .mst_r_t (mst_r_t), + .mst_req_t (mst_req_t), + .mst_resp_t (mst_resp_t), + .snoop_ac_t (snoop_ac_t), + .snoop_cr_t (snoop_cr_t), + .snoop_cd_t (snoop_cd_t), + .snoop_req_t (snoop_req_t), + .snoop_resp_t (snoop_resp_t) + ) u_ace_ccu ( + .clk_i, + .rst_ni, + .slv_req_i (slv_req), + .slv_resp_o (slv_resp), + .domain_rule_i(domain_rule_i), + .snoop_req_o (snoop_req), + .snoop_resp_i (snoop_resp), + .mst_req_o (mst_req), + .mst_resp_i (mst_resp) + ); + +endmodule diff --git a/src/ccu/ace_ccu_tracker.sv b/src/ccu/ace_ccu_tracker.sv new file mode 100644 index 0000000..55888d9 --- /dev/null +++ b/src/ccu/ace_ccu_tracker.sv @@ -0,0 +1,230 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module ace_ccu_tracker + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type slv_bv_t = logic, + parameter type slv_idx_t = logic, + parameter type nline_t = logic, + parameter type ccu_id_t = logic, + parameter type tid_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + output logic full_o, + + // Check/alloc interface + // {{{ + input logic check_i, + output logic check_hit_o, + input logic alloc_i, + input logic alloc_b_i, + input logic alloc_r_i, + input nline_t alloc_nline_i, + input ccu_id_t alloc_id_i, + output tid_t alloc_tid_o, + // }}} + + // Lookup/dealloc interface + // {{{ + input slv_bv_t dealloc_rack_i, + input slv_bv_t dealloc_wack_i, + input logic dealloc_r_resp_i, + input ccu_id_t dealloc_r_resp_id_i, + input logic dealloc_b_resp_i, + input logic dealloc_check_b_resp_i, + input ccu_id_t dealloc_b_resp_id_i, + output logic dealloc_b_resp_wb_o, + // }}} + + input logic updt_wb_i, + input tid_t updt_wb_tid_i +); + + // Typedefs + // {{{ + typedef struct packed { + logic r; + logic b; + logic wb; + } meta_t; + + typedef struct packed { + nline_t nline; + ccu_id_t id; + } data_t; + // }}} + + // Internal signals + // {{{ + logic [CcuCfg.u.MaxTransactions-1:0] valid_q; + logic [CcuCfg.u.MaxTransactions-1:0] valid_d; + logic [CcuCfg.u.MaxTransactions-1:0] valid_set; + logic [CcuCfg.u.MaxTransactions-1:0] valid_clr; + + meta_t [CcuCfg.u.MaxTransactions-1:0] meta_q; + meta_t [CcuCfg.u.MaxTransactions-1:0] meta_d; + meta_t [CcuCfg.u.MaxTransactions-1:0] meta_set; + meta_t [CcuCfg.u.MaxTransactions-1:0] meta_clr; + data_t [CcuCfg.u.MaxTransactions-1:0] data_q; + + logic [CcuCfg.u.MaxTransactions-1:0] hit_id_bv; + logic [CcuCfg.u.MaxTransactions-1:0] hit_nline_bv; + + tid_t rack_queue_wdata; + tid_t wack_queue_wdata; + tid_t [ CcuCfg.u.SlvPorts] rack_queue_rdata; + tid_t [ CcuCfg.u.SlvPorts] wack_queue_rdata; + // }}} + + // Alloc logic + // {{{ + for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_alloc + assign valid_set[i] = alloc_i && (i == alloc_tid_o); + assign meta_set[i].r = valid_set[i] && alloc_r_i; + assign meta_set[i].b = valid_set[i] && alloc_b_i; + end + + always_comb begin : alloc_tid_comb + alloc_tid_o = '0; + for (int unsigned i = 0; i < CcuCfg.u.MaxTransactions; i++) begin + if (!valid_q[i]) begin + alloc_tid_o = CcuCfg.TransactionIdxWidth'(i); + break; + end + end + end + // }}} + + // Dealloc logic + // {{{ + for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_dealloc + slv_idx_t dealloc_slv_id; + assign dealloc_slv_id = data_q[i].id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth]; + assign meta_clr[i].r = dealloc_rack_i[dealloc_slv_id] && (i == rack_queue_rdata[dealloc_slv_id]); + assign meta_clr[i].b = dealloc_wack_i[dealloc_slv_id] && (i == wack_queue_rdata[dealloc_slv_id]); + assign valid_clr[i] = ~|meta_d[i]; + end + + + always_comb begin : xack_queue_wdata_mux + rack_queue_wdata = '0; + wack_queue_wdata = '0; + + for (int unsigned i = 0; i < CcuCfg.u.MaxTransactions; i++) begin + if (dealloc_b_resp_id_i == data_q[i].id && valid_q[i]) + wack_queue_wdata = CcuCfg.TransactionIdxWidth'(i); + if (dealloc_r_resp_id_i == data_q[i].id && valid_q[i]) + rack_queue_wdata = CcuCfg.TransactionIdxWidth'(i); + end + end + + for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_xack_queues + logic wack_queue_push; + logic rack_queue_push; + + assign wack_queue_push = dealloc_b_resp_i && data_q[wack_queue_wdata].id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); + assign rack_queue_push = dealloc_r_resp_i && data_q[rack_queue_wdata].id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); + + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DEPTH (CcuCfg.u.MaxTransactions), + .dtype (tid_t) + ) u_tracker_wack_queue ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (), + .empty_o (), + .usage_o (), + .data_i (wack_queue_wdata), + .push_i (wack_queue_push), + .data_o (wack_queue_rdata[i]), + .pop_i (dealloc_wack_i[i]) + ); + + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DEPTH (CcuCfg.u.MaxTransactions), + .dtype (tid_t) + ) u_tracker_rack_queue ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (), + .empty_o (), + .usage_o (), + .data_i (rack_queue_wdata), + .push_i (rack_queue_push), + .data_o (rack_queue_rdata[i]), + .pop_i (dealloc_rack_i[i]) + ); + end + // }}} + + // State holding elements + // {{{ + for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_ffs + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + data_q[i] <= '0; + end else if (valid_set[i]) begin + data_q[i] <= '{nline: alloc_nline_i, id: alloc_id_i}; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + meta_q[i] <= '0; + valid_q[i] <= 1'b0; + end else begin + meta_q[i] <= meta_d[i]; + valid_q[i] <= valid_d[i]; + end + end + + assign meta_d[i] = (meta_set[i] & ~meta_q[i]) | (~meta_clr[i] & meta_q[i]); + assign valid_d[i] = (valid_set[i] & ~valid_q[i]) | (~valid_clr[i] & valid_q[i]); + end + // }}} + + // Check logic + // {{{ + for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_check + assign hit_id_bv[i] = valid_q[i] && (data_q[i].id == alloc_id_i); + assign hit_nline_bv[i] = valid_q[i] && (data_q[i].nline == alloc_nline_i); + end + + assign check_hit_o = check_i && |{hit_id_bv, hit_nline_bv}; + // }}} + + // Writeback logic + // {{{ + for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_writeback + assign meta_set[i].wb = updt_wb_i && updt_wb_tid_i == CcuCfg.TransactionIdxWidth'(i); + assign meta_clr[i].wb = dealloc_check_b_resp_i && wack_queue_wdata == CcuCfg.TransactionIdxWidth'(i); + end + + assign dealloc_b_resp_wb_o = dealloc_check_b_resp_i && meta_q[wack_queue_wdata].wb; + // }}} + + // Global control + // {{{ + assign full_o = (valid_q == '1); + // }}} + +endmodule diff --git a/src/ccu/ace_ccu_write.sv b/src/ccu/ace_ccu_write.sv new file mode 100644 index 0000000..99a674d --- /dev/null +++ b/src/ccu/ace_ccu_write.sv @@ -0,0 +1,243 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "axi/assign.svh" + +module ace_ccu_write + import ace_pkg::*; + import ace_ccu_pkg::*; +#( + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type ccu_ax_t = logic, + parameter type tid_t = logic, + parameter type ccu_aw_t = logic, + parameter type w_t = logic, + parameter type ccu_b_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + // Ctrl + input logic valid_i, + output logic ready_o, + input ccu_ax_t ax_i, + input logic ax_is_write_i, + input logic ax_is_writeback_i, + input tid_t ax_tid_i, + + output logic tracker_updt_wb_o, + output tid_t tracker_updt_wb_tid_o, + input logic b_is_writeback_i, + + // Slv interface + input w_t w_i, + input logic w_valid_i, + output logic w_ready_o, + input w_t cd_w_i, + input logic cd_w_valid_i, + output logic cd_w_ready_o, + output ccu_b_t b_o, + output logic b_valid_o, + input logic b_ready_i, + // Mst interface + output ccu_aw_t aw_o, + output logic aw_valid_o, + input logic aw_ready_i, + output w_t w_o, + output logic w_valid_o, + input logic w_ready_i, + input ccu_b_t b_i, + input logic b_valid_i, + output logic b_ready_o +); + // Typedefs + // {{{ + typedef struct packed { + ccu_ax_t ax; + logic ax_is_write; + logic ax_is_writeback; + tid_t ax_tid; + } aw_sync_reg_t; + // }}} + + // Internal signals + // {{{ + aw_sync_reg_t aw_sync_wdata; + aw_sync_reg_t aw_sync_rdata; + logic aw_sync_valid; + logic aw_sync_ready; + logic aw_sync_gate; + logic aw_is_writeback; + logic aw_writeback_done_d; + logic aw_writeback_done_q; + + logic w_ctrl_fifo_valid_in; + logic w_ctrl_fifo_ready_in; + logic w_ctrl_fifo_valid_out; + logic w_ctrl_fifo_ready_out; + logic w_mux_valid_out; + logic w_mux_ready_out; + logic w_is_write_back; + // }}} + + // AW channel + // {{{ + + // Decouple AW handling from snoop pipe + assign aw_sync_wdata = '{ + ax: ax_i, + ax_is_write: ax_is_write_i, + ax_is_writeback: ax_is_writeback_i, + ax_tid: ax_tid_i + }; + + fall_through_register #( + .T(aw_sync_reg_t) + ) u_aw_sync_reg ( + .clk_i, + .rst_ni, + .clr_i (1'b0), + .testmode_i(1'b0), + .valid_i (valid_i), + .ready_o (ready_o), + .data_i (aw_sync_wdata), + .valid_o (aw_sync_valid), + .ready_i (aw_sync_ready && !aw_sync_gate), + .data_o (aw_sync_rdata) + ); + + assign tracker_updt_wb_tid_o = aw_sync_rdata.ax_tid; + + always_comb begin : aw_writeback_fsm_comb + aw_writeback_done_d = aw_writeback_done_q; + aw_is_writeback = 1'b0; + aw_sync_gate = 1'b0; + + tracker_updt_wb_o = 1'b0; + + if (!aw_writeback_done_q) begin + if (aw_sync_rdata.ax_is_writeback) begin + // A writeback is pending + aw_is_writeback = 1'b1; + if (aw_sync_valid && aw_sync_ready) begin + // The writeback request is done + tracker_updt_wb_o = 1'b1; + if (aw_sync_rdata.ax_is_write) begin + // A write is also pending + aw_writeback_done_d = 1'b1; + aw_sync_gate = 1'b1; + end + end + end + end else begin + // Send the pending write after the writeback + if (aw_sync_valid && aw_sync_ready) begin + aw_writeback_done_d = 1'b0; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + aw_writeback_done_q <= 1'b0; + end else begin + aw_writeback_done_q <= aw_writeback_done_d; + end + end + + always_comb begin : aw_mux_comb + aw_o = '0; + + `AXI_SET_AW_STRUCT(aw_o, aw_sync_rdata.ax) + + if (aw_is_writeback) begin + // Pass a full cacheline + aw_o.addr = axi_pkg::aligned_addr(aw_sync_rdata.ax.addr, CcuCfg.CachelineBytesIdxWidth); + aw_o.len = CcuCfg.CachelineAxiTransfers - 1; + aw_o.size = CcuCfg.AxiDataBytesIdxWidth; + // Burst type for write backs + aw_o.burst = axi_pkg::BURST_WRAP; + // The write back is not atomic + aw_o.lock = 1'b0; + aw_o.atop = '0; + end + end + + stream_fork #( + .N_OUP(2) + ) u_aw_fork ( + .clk_i, + .rst_ni, + .valid_i(aw_sync_valid), + .ready_o(aw_sync_ready), + .valid_o({aw_valid_o, w_ctrl_fifo_valid_in}), + .ready_i({aw_ready_i, w_ctrl_fifo_ready_in}) + ); + // }}} + + + // W channel + // {{{ + stream_fifo #( + .FALL_THROUGH(1'b1), + .DATA_WIDTH (1), + .DEPTH (2) + ) u_w_ctrl_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (), + .data_i (aw_is_writeback), + .valid_i (w_ctrl_fifo_valid_in), + .ready_o (w_ctrl_fifo_ready_in), + .data_o (w_is_write_back), + .valid_o (w_ctrl_fifo_valid_out), + .ready_i (w_ctrl_fifo_ready_out && w_o.last) + ); + + stream_mux #( + .DATA_T(w_t), + .N_INP (2) + ) u_w_mux ( + .inp_data_i ({cd_w_i, w_i}), + .inp_valid_i({cd_w_valid_i, w_valid_i}), + .inp_ready_o({cd_w_ready_o, w_ready_o}), + .inp_sel_i (w_is_write_back), + .oup_data_o (w_o), + .oup_valid_o(w_mux_valid_out), + .oup_ready_i(w_mux_ready_out) + ); + + stream_join #( + .N_INP(2) + ) u_w_join ( + .inp_valid_i({w_ctrl_fifo_valid_out, w_mux_valid_out}), + .inp_ready_o({w_ctrl_fifo_ready_out, w_mux_ready_out}), + .oup_valid_o(w_valid_o), + .oup_ready_i(w_ready_i) + ); + // }}} + + // B channel + // {{{ + stream_filter u_b_filter ( + .valid_i(b_valid_i), + .ready_o(b_ready_o), + .drop_i (b_is_writeback_i), + .valid_o(b_valid_o), + .ready_i(b_ready_i) + ); + + `AXI_ASSIGN_B_STRUCT(b_o, b_i) + // }}} + +endmodule diff --git a/src/ccu_ctrl.sv b/src/ccu/deprecated/ccu_ctrl.sv similarity index 100% rename from src/ccu_ctrl.sv rename to src/ccu/deprecated/ccu_ctrl.sv diff --git a/src/ccu_ctrl_decoder.sv b/src/ccu/deprecated/ccu_ctrl_decoder.sv similarity index 98% rename from src/ccu_ctrl_decoder.sv rename to src/ccu/deprecated/ccu_ctrl_decoder.sv index f26dffd..05fad39 100644 --- a/src/ccu_ctrl_decoder.sv +++ b/src/ccu/deprecated/ccu_ctrl_decoder.sv @@ -113,19 +113,19 @@ module ccu_ctrl_decoder import ccu_ctrl_pkg::*; logic send_invalid_r; logic collision; - assign send_invalid_r = ar_holder.snoop == snoop_pkg::CLEAN_UNIQUE || ar_holder.lock; + assign send_invalid_r = ar_holder.snoop == snoop_pkg::CleanUnique || ar_holder.lock; assign collision = b_collision_i || r_collision_i; always_comb begin aw_ac = '0; aw_ac.addr = aw_holder.addr; aw_ac.prot = aw_holder.prot; - aw_ac.snoop = snoop_pkg::CLEAN_INVALID; + aw_ac.snoop = snoop_pkg::CleanInvalid; ar_ac = '0; ar_ac.addr = ar_holder.addr; ar_ac.prot = ar_holder.prot; - ar_ac.snoop = send_invalid_r ? snoop_pkg::CLEAN_INVALID : ar_holder.snoop; + ar_ac.snoop = send_invalid_r ? snoop_pkg::CleanInvalid : ar_holder.snoop; end spill_register #( diff --git a/src/ccu_ctrl_memory_unit.sv b/src/ccu/deprecated/ccu_ctrl_memory_unit.sv similarity index 100% rename from src/ccu_ctrl_memory_unit.sv rename to src/ccu/deprecated/ccu_ctrl_memory_unit.sv diff --git a/src/ccu_ctrl_pkg.sv b/src/ccu/deprecated/ccu_ctrl_pkg.sv similarity index 100% rename from src/ccu_ctrl_pkg.sv rename to src/ccu/deprecated/ccu_ctrl_pkg.sv diff --git a/src/ccu_ctrl_snoop_unit.sv b/src/ccu/deprecated/ccu_ctrl_snoop_unit.sv similarity index 100% rename from src/ccu_ctrl_snoop_unit.sv rename to src/ccu/deprecated/ccu_ctrl_snoop_unit.sv diff --git a/src/deprecated/ace_sim_master.sv b/src/deprecated/ace_sim_master.sv new file mode 100644 index 0000000..16f7409 --- /dev/null +++ b/src/deprecated/ace_sim_master.sv @@ -0,0 +1,1543 @@ +package ace_sim_master; + +import axi_test::*; + +typedef enum logic [3:0] { + AR_READ_NO_SNOOP, + AR_READ_ONCE, + AR_READ_SHARED, + AR_READ_CLEAN, + AR_READ_NOT_SHARED_DIRTY, + AR_READ_UNIQUE, + AR_CLEAN_UNIQUE, + AR_MAKE_UNIQUE, + AR_CLEAN_SHARED, + AR_CLEAN_INVALID, + AR_MAKE_INVALID, + AR_BARRIER, + AR_DVM_COMPLETE, + AR_DVM_MESSAGE +} ar_snoop_e; + +ar_snoop_e ar_unsupported_ops[] = '{AR_READ_NO_SNOOP, AR_BARRIER, AR_DVM_COMPLETE, AR_DVM_MESSAGE}; + +typedef enum logic [2:0] { + AW_WRITE_NO_SNOOP, + AW_WRITE_UNIQUE, + AW_WRITE_LINE_UNIQUE, + AW_WRITE_CLEAN, + AW_WRITE_BACK, + AW_EVICT, + AW_WRITE_EVICT, + AW_BARRIER +} aw_snoop_e; + +aw_snoop_e aw_unsupported_ops[] = '{AW_WRITE_NO_SNOOP, AW_BARRIER}; + +/// The data transferred on a beat on the AW/AR channels. +class ace_ax_beat #( + parameter AW = 32, + parameter IW = 8 , + parameter UW = 1 +); + rand logic [IW-1:0] ax_id = '0; + rand logic [AW-1:0] ax_addr = '0; + logic [7:0] ax_len = '0; + logic [2:0] ax_size = '0; + logic [1:0] ax_burst = '0; + logic ax_lock = '0; + logic [3:0] ax_cache = '0; + logic [2:0] ax_prot = '0; + rand logic [3:0] ax_qos = '0; + logic [3:0] ax_region = '0; + logic [5:0] ax_atop = '0; // Only defined on the AW channel. + rand logic [UW-1:0] ax_user = '0; + rand logic [3:0] ax_snoop = '0; // AW channel requires 3 bits, AR channel requires 4 bits + rand logic [1:0] ax_bar = '0; + rand logic [1:0] ax_domain = '0; + rand logic ax_awunique = '0; // Only for AW +endclass + +/// The data transferred on a beat on the R channel. +class ace_r_beat #( + parameter DW = 32, + parameter IW = 8 , + parameter UW = 1 +); + rand logic [IW-1:0] r_id = '0; + rand logic [DW-1:0] r_data = '0; + ace_pkg::rresp_t r_resp = '0; + logic r_last = '0; + rand logic [UW-1:0] r_user = '0; +endclass + +/// The data transferred on a beat on the AC channel. +/// Plus an extra signal to determine data transfer +class ace_ac_beat #( + parameter AW = 32 +); + logic [AW-1:0] ac_addr = '0; + logic [3:0] ac_snoop = '0; + logic [2:0] ac_prot = '0; + logic data_transfer = '0; +endclass + +/// The data transferred on a beat on the CD channel. +class ace_cd_beat #( + parameter DW = 32 +); + rand logic [DW-1:0] cd_data = '0; + logic cd_last; +endclass + +/// The data transferred on a beat on the CR channel. +class ace_cr_beat; + ace_pkg::crresp_t cr_resp = '0; +endclass + +class ace_driver #( + parameter int AW = 32, + parameter int DW = 32, + parameter int AC_AW = AW, + parameter int CD_DW = DW, + parameter int IW = 8, + parameter int UW = 1, + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns // stimuli test time +); + virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AW), + .AXI_DATA_WIDTH (DW), + .AXI_ID_WIDTH (IW), + .AXI_USER_WIDTH (UW) + ) ace; + + virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AC_AW), + .SNOOP_DATA_WIDTH (CD_DW) + ) snoop; + + typedef ace_ax_beat #(.AW(AW), .IW(IW), .UW(UW)) ax_beat_t; + typedef axi_w_beat #(.DW(DW), .UW(UW)) w_beat_t; + typedef axi_b_beat #(.IW(IW), .UW(UW)) b_beat_t; + typedef ace_r_beat #(.DW(DW), .IW(IW), .UW(UW)) r_beat_t; + typedef ace_ac_beat #(.AW(AC_AW)) ac_beat_t; + typedef ace_cd_beat #(.DW(CD_DW)) cd_beat_t; + typedef ace_cr_beat cr_beat_t; + + function new ( + virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AW), + .AXI_DATA_WIDTH (DW), + .AXI_ID_WIDTH (IW), + .AXI_USER_WIDTH (UW) + ) ace, + virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AC_AW), + .SNOOP_DATA_WIDTH (CD_DW) + ) snoop + ); + this.ace = ace; + this.snoop = snoop; + endfunction + + function void reset_master(); + ace.aw_id <= '0; + ace.aw_addr <= '0; + ace.aw_len <= '0; + ace.aw_size <= '0; + ace.aw_burst <= '0; + ace.aw_lock <= '0; + ace.aw_cache <= '0; + ace.aw_prot <= '0; + ace.aw_qos <= '0; + ace.aw_region <= '0; + ace.aw_atop <= '0; + ace.aw_user <= '0; + ace.aw_valid <= '0; + ace.aw_snoop <= '0; + ace.aw_bar <= '0; + ace.aw_domain <= '0; + ace.aw_awunique <= '0; + ace.w_data <= '0; + ace.w_strb <= '0; + ace.w_last <= '0; + ace.w_user <= '0; + ace.w_valid <= '0; + ace.b_ready <= '0; + ace.ar_id <= '0; + ace.ar_addr <= '0; + ace.ar_len <= '0; + ace.ar_size <= '0; + ace.ar_burst <= '0; + ace.ar_lock <= '0; + ace.ar_cache <= '0; + ace.ar_prot <= '0; + ace.ar_qos <= '0; + ace.ar_region <= '0; + ace.ar_user <= '0; + ace.ar_snoop <= '0; + ace.ar_bar <= '0; + ace.ar_domain <= '0; + ace.ar_valid <= '0; + ace.r_ready <= '0; + ace.wack <= '0; + ace.rack <= '0; + snoop.ac_ready <= '0; + snoop.cr_valid <= '0; + snoop.cr_resp <= '0; + snoop.cd_valid <= '0; + snoop.cd_data <= '0; + snoop.cd_last <= '0; + endfunction + + function void reset_slave(); + ace.aw_ready <= '0; + ace.w_ready <= '0; + ace.b_id <= '0; + ace.b_resp <= '0; + ace.b_user <= '0; + ace.b_valid <= '0; + ace.ar_ready <= '0; + ace.r_id <= '0; + ace.r_data <= '0; + ace.r_resp <= '0; + ace.r_last <= '0; + ace.r_user <= '0; + ace.r_valid <= '0; + snoop.ac_valid <= '0; + snoop.ac_addr <= '0; + snoop.ac_prot <= '0; + snoop.ac_snoop <= '0; + snoop.cr_ready <= '0; + snoop.cd_ready <= '0; + endfunction + + task cycle_start; + #TT; + endtask + + task cycle_end; + @(posedge ace.clk_i); + endtask + + /// Issue a beat on the AW channel. + task send_aw ( + input ax_beat_t beat + ); + ace.aw_id <= #TA beat.ax_id; + ace.aw_addr <= #TA beat.ax_addr; + ace.aw_len <= #TA beat.ax_len; + ace.aw_size <= #TA beat.ax_size; + ace.aw_burst <= #TA beat.ax_burst; + ace.aw_lock <= #TA beat.ax_lock; + ace.aw_cache <= #TA beat.ax_cache; + ace.aw_prot <= #TA beat.ax_prot; + ace.aw_qos <= #TA beat.ax_qos; + ace.aw_region <= #TA beat.ax_region; + ace.aw_atop <= #TA beat.ax_atop; + ace.aw_user <= #TA beat.ax_user; + ace.aw_valid <= #TA 1; + ace.aw_snoop <= #TA beat.ax_snoop; + ace.aw_bar <= #TA beat.ax_bar; + ace.aw_domain <= #TA beat.ax_domain; + ace.aw_awunique <= #TA beat.ax_awunique; + cycle_start(); + while (ace.aw_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.aw_id <= #TA '0; + ace.aw_addr <= #TA '0; + ace.aw_len <= #TA '0; + ace.aw_size <= #TA '0; + ace.aw_burst <= #TA '0; + ace.aw_lock <= #TA '0; + ace.aw_cache <= #TA '0; + ace.aw_prot <= #TA '0; + ace.aw_qos <= #TA '0; + ace.aw_region <= #TA '0; + ace.aw_atop <= #TA '0; + ace.aw_user <= #TA '0; + ace.aw_valid <= #TA 0; + ace.aw_snoop <= #TA '0; + ace.aw_bar <= #TA '0; + ace.aw_domain <= #TA '0; + ace.aw_awunique <= #TA 0; + endtask + + /// Issue a beat on the W channel. + task send_w ( + input w_beat_t beat + ); + ace.w_data <= #TA beat.w_data; + ace.w_strb <= #TA beat.w_strb; + ace.w_last <= #TA beat.w_last; + ace.w_user <= #TA beat.w_user; + ace.w_valid <= #TA 1; + cycle_start(); + while (ace.w_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.w_data <= #TA '0; + ace.w_strb <= #TA '0; + ace.w_last <= #TA '0; + ace.w_user <= #TA '0; + ace.w_valid <= #TA 0; + endtask + + /// Issue a beat on the B channel. + task send_b ( + input b_beat_t beat + ); + ace.b_id <= #TA beat.b_id; + ace.b_resp <= #TA beat.b_resp; + ace.b_user <= #TA beat.b_user; + ace.b_valid <= #TA 1; + cycle_start(); + while (ace.b_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.b_id <= #TA '0; + ace.b_resp <= #TA '0; + ace.b_user <= #TA '0; + ace.b_valid <= #TA 0; + cycle_start(); + while (ace.wack != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + endtask + + /// Issue a beat on the AR channel. + task send_ar ( + input ax_beat_t beat + ); + ace.ar_id <= #TA beat.ax_id; + ace.ar_addr <= #TA beat.ax_addr; + ace.ar_len <= #TA beat.ax_len; + ace.ar_size <= #TA beat.ax_size; + ace.ar_burst <= #TA beat.ax_burst; + ace.ar_lock <= #TA beat.ax_lock; + ace.ar_cache <= #TA beat.ax_cache; + ace.ar_prot <= #TA beat.ax_prot; + ace.ar_qos <= #TA beat.ax_qos; + ace.ar_region <= #TA beat.ax_region; + ace.ar_user <= #TA beat.ax_user; + ace.ar_valid <= #TA 1; + ace.ar_snoop <= #TA beat.ax_snoop; + ace.ar_bar <= #TA beat.ax_bar; + ace.ar_domain <= #TA beat.ax_domain; + cycle_start(); + while (ace.ar_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.ar_id <= #TA '0; + ace.ar_addr <= #TA '0; + ace.ar_len <= #TA '0; + ace.ar_size <= #TA '0; + ace.ar_burst <= #TA '0; + ace.ar_lock <= #TA '0; + ace.ar_cache <= #TA '0; + ace.ar_prot <= #TA '0; + ace.ar_qos <= #TA '0; + ace.ar_region <= #TA '0; + ace.ar_user <= #TA '0; + ace.ar_valid <= #TA 0; + ace.ar_snoop <= #TA '0; + ace.ar_bar <= #TA '0; + ace.ar_domain <= #TA '0; + endtask + + /// Issue a beat on the R channel. + task send_r ( + input r_beat_t beat + ); + ace.r_id <= #TA beat.r_id; + ace.r_data <= #TA beat.r_data; + ace.r_resp <= #TA beat.r_resp; + ace.r_last <= #TA beat.r_last; + ace.r_user <= #TA beat.r_user; + ace.r_valid <= #TA 1; + cycle_start(); + while (ace.r_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.r_id <= #TA '0; + ace.r_data <= #TA '0; + ace.r_resp <= #TA '0; + ace.r_last <= #TA '0; + ace.r_user <= #TA '0; + ace.r_valid <= #TA 0; + cycle_start(); + while (ace.rack != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + endtask + + /// Wait for a beat on the AW channel. + task recv_aw ( + output ax_beat_t beat + ); + ace.aw_ready <= #TA 1; + cycle_start(); + while (ace.aw_valid != 1) begin cycle_end(); cycle_start(); end + beat = new; + beat.ax_id = ace.aw_id; + beat.ax_addr = ace.aw_addr; + beat.ax_len = ace.aw_len; + beat.ax_size = ace.aw_size; + beat.ax_burst = ace.aw_burst; + beat.ax_lock = ace.aw_lock; + beat.ax_cache = ace.aw_cache; + beat.ax_prot = ace.aw_prot; + beat.ax_qos = ace.aw_qos; + beat.ax_region = ace.aw_region; + beat.ax_atop = ace.aw_atop; + beat.ax_user = ace.aw_user; + beat.ax_snoop = ace.aw_snoop; + beat.ax_bar = ace.aw_bar; + beat.ax_domain = ace.aw_domain; + beat.ax_awunique = ace.aw_awunique; + cycle_end(); + ace.aw_ready <= #TA 0; + endtask + + /// Wait for a beat on the W channel. + task recv_w ( + output w_beat_t beat + ); + ace.w_ready <= #TA 1; + cycle_start(); + while (ace.w_valid != 1) begin cycle_end(); cycle_start(); end + beat = new; + beat.w_data = ace.w_data; + beat.w_strb = ace.w_strb; + beat.w_last = ace.w_last; + beat.w_user = ace.w_user; + cycle_end(); + ace.w_ready <= #TA 0; + endtask + + /// Wait for a beat on the B channel. + task recv_b ( + output b_beat_t beat + ); + ace.b_ready <= #TA 1; + cycle_start(); + while (ace.b_valid != 1) begin cycle_end(); cycle_start(); end + beat = new; + beat.b_id = ace.b_id; + beat.b_resp = ace.b_resp; + beat.b_user = ace.b_user; + cycle_end(); + ace.b_ready <= #TA 0; + ace.wack <= #TA 1; + cycle_start(); + ace.wack <= #TA 0; + endtask + + /// Wait for a beat on the AR channel. + task recv_ar ( + output ax_beat_t beat + ); + ace.ar_ready <= #TA 1; + cycle_start(); + while (ace.ar_valid != 1) begin cycle_end(); cycle_start(); end + beat = new; + beat.ax_id = ace.ar_id; + beat.ax_addr = ace.ar_addr; + beat.ax_len = ace.ar_len; + beat.ax_size = ace.ar_size; + beat.ax_burst = ace.ar_burst; + beat.ax_lock = ace.ar_lock; + beat.ax_cache = ace.ar_cache; + beat.ax_prot = ace.ar_prot; + beat.ax_qos = ace.ar_qos; + beat.ax_region = ace.ar_region; + beat.ax_atop = 'X; // Not defined on the AR channel. + beat.ax_user = ace.ar_user; + beat.ax_snoop = ace.ar_snoop; + beat.ax_bar = ace.ar_bar; + beat.ax_domain = ace.ar_domain; + cycle_end(); + ace.ar_ready <= #TA 0; + endtask + + /// Wait for a beat on the R channel. + task recv_r ( + output r_beat_t beat + ); + ace.r_ready <= #TA 1; + cycle_start(); + while (ace.r_valid != 1) begin cycle_end(); cycle_start(); end + beat = new; + beat.r_id = ace.r_id; + beat.r_data = ace.r_data; + beat.r_resp = ace.r_resp; + beat.r_last = ace.r_last; + beat.r_user = ace.r_user; + cycle_end(); + ace.r_ready <= #TA 0; + ace.rack <= #TA ace.r_last; + cycle_start(); + ace.rack <= #TA 0; + endtask + + /// Issue a beat on the AC channel. + task send_ac ( + input ac_beat_t beat + ); + snoop.ac_valid <= #TA 1; + snoop.ac_addr <= #TA beat.ac_addr; + snoop.ac_snoop <= #TA beat.ac_snoop; + snoop.ac_prot <= #TA beat.ac_prot; + cycle_start(); + while (snoop.ac_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + snoop.ac_valid <= #TA '0; + snoop.ac_addr <= #TA '0; + snoop.ac_snoop <= #TA '0; + snoop.ac_prot <= #TA '0; + endtask + + /// Issue a beat on the CR channel. + task send_cr ( + input cr_beat_t beat + ); + snoop.cr_valid <= #TA 1; + snoop.cr_resp <= #TA beat.cr_resp; + cycle_start(); + while (snoop.cr_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + snoop.cr_valid <= #TA '0; + snoop.cr_resp <= #TA '0; + endtask + + /// Issue a beat on the CD channel. + task send_cd ( + input cd_beat_t beat + ); + snoop.cd_valid <= #TA 1; + snoop.cd_data <= #TA beat.cd_data; + snoop.cd_last <= #TA beat.cd_last; + cycle_start(); + while (snoop.cd_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + snoop.cd_valid <= #TA '0; + snoop.cd_data <= #TA '0; + snoop.cd_last <= #TA '0; + endtask + + /// Wait for a beat on the AC channel. + task recv_ac ( + output ac_beat_t beat, + ref logic sim_done + ); + snoop.ac_ready <= #TA 1; + cycle_start(); + while ((snoop.ac_valid != 1) && !sim_done) begin + cycle_end(); cycle_start(); + end + if (!sim_done) begin + beat = new; + beat.ac_addr = snoop.ac_addr; + beat.ac_snoop = snoop.ac_snoop; + beat.ac_prot = snoop.ac_prot; + cycle_end(); + snoop.ac_ready <= #TA 0; + end + endtask + + /// Wait for a beat on the CR channel. + task recv_cr ( + output cr_beat_t beat + ); + snoop.cr_ready <= #TA 1; + cycle_start(); + while (snoop.cr_valid != 1) begin cycle_end(); cycle_start(); end + beat = new; + beat.cr_resp = snoop.cr_resp; + cycle_end(); + snoop.cr_ready <= #TA 0; + endtask + + /// Wait for a beat on the CD channel. + task recv_cd ( + output cd_beat_t beat + ); + beat = new; + beat.cd_last = '0; + while (!beat.cd_last) begin + snoop.cd_ready <= #TA 1; + cycle_start(); + while (snoop.cd_valid != 1) begin cycle_end(); cycle_start(); end + beat.cd_data = snoop.cd_data; + beat.cd_last = snoop.cd_last; + cycle_end(); + snoop.cd_ready <= #TA 0; + end + endtask + + /// Monitor the AC channel and return the next beat. + task mon_ac ( + output ac_beat_t beat + ); + cycle_start(); + while (!(snoop.ac_valid && snoop.ac_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.ac_addr = snoop.ac_addr; + beat.ac_snoop = snoop.ac_snoop; + beat.ac_prot = snoop.ac_prot; + cycle_end(); + endtask + + /// Monitor the CR channel and return the next beat. + task mon_cr ( + output cr_beat_t beat + ); + cycle_start(); + while (!(snoop.cr_valid && snoop.cr_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.cr_resp = snoop.cr_resp; + cycle_end(); + endtask + + /// Monitor the CD channel and return the next beat. + task mon_cd ( + output cd_beat_t beat + ); + cycle_start(); + while (!(snoop.cd_valid && snoop.cd_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.cd_data = snoop.cd_data; + beat.cd_last = snoop.cd_last; + cycle_end(); + endtask + + /// Monitor the AW channel and return the next beat. + task mon_aw ( + output ax_beat_t beat + ); + cycle_start(); + while (!(ace.aw_valid && ace.aw_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.ax_id = ace.aw_id; + beat.ax_addr = ace.aw_addr; + beat.ax_len = ace.aw_len; + beat.ax_size = ace.aw_size; + beat.ax_burst = ace.aw_burst; + beat.ax_lock = ace.aw_lock; + beat.ax_cache = ace.aw_cache; + beat.ax_prot = ace.aw_prot; + beat.ax_qos = ace.aw_qos; + beat.ax_region = ace.aw_region; + beat.ax_atop = ace.aw_atop; + beat.ax_user = ace.aw_user; + beat.ax_snoop = ace.aw_snoop; + beat.ax_bar = ace.aw_bar; + beat.ax_domain = ace.aw_domain; + beat.ax_awunique = ace.aw_awunique; + cycle_end(); + endtask + + /// Monitor the W channel and return the next beat. + task mon_w ( + output w_beat_t beat + ); + cycle_start(); + while (!(ace.w_valid && ace.w_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.w_data = ace.w_data; + beat.w_strb = ace.w_strb; + beat.w_last = ace.w_last; + beat.w_user = ace.w_user; + cycle_end(); + endtask + + /// Monitor the B channel and return the next beat. + task mon_b ( + output b_beat_t beat + ); + cycle_start(); + while (!(ace.b_valid && ace.b_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.b_id = ace.b_id; + beat.b_resp = ace.b_resp; + beat.b_user = ace.b_user; + cycle_end(); + endtask + + /// Monitor the AR channel and return the next beat. + task mon_ar ( + output ax_beat_t beat + ); + cycle_start(); + while (!(ace.ar_valid && ace.ar_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.ax_id = ace.ar_id; + beat.ax_addr = ace.ar_addr; + beat.ax_len = ace.ar_len; + beat.ax_size = ace.ar_size; + beat.ax_burst = ace.ar_burst; + beat.ax_lock = ace.ar_lock; + beat.ax_cache = ace.ar_cache; + beat.ax_prot = ace.ar_prot; + beat.ax_qos = ace.ar_qos; + beat.ax_region = ace.ar_region; + beat.ax_atop = 'X; // Not defined on the AR channel. + beat.ax_user = ace.ar_user; + beat.ax_snoop = ace.ar_snoop; + beat.ax_bar = ace.ar_bar; + beat.ax_domain = ace.ar_domain; + cycle_end(); + endtask + + /// Monitor the R channel and return the next beat. + task mon_r ( + output r_beat_t beat + ); + cycle_start(); + while (!(ace.r_valid && ace.r_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.r_id = ace.r_id; + beat.r_data = ace.r_data; + beat.r_resp = ace.r_resp; + beat.r_last = ace.r_last; + beat.r_user = ace.r_user; + cycle_end(); + endtask + +endclass + +class ace_rand_master #( + // AXI interface parameters + parameter int AW = 32, + parameter int DW = 32, + parameter int IW = 8, + parameter int UW = 1, + // Snoop interface parameters + parameter int AC_AW = AW, // AC addr width + parameter int CD_DW = DW, // CD data width + // Stimuli application and test time + parameter time TA = 0ps, + parameter time TT = 0ps, + // Maximum number of read and write transactions in flight + parameter int MAX_READ_TXNS = 1, + parameter int MAX_WRITE_TXNS = 1, + // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels + parameter int AX_MIN_WAIT_CYCLES = 0, + parameter int AX_MAX_WAIT_CYCLES = 100, + parameter int W_MIN_WAIT_CYCLES = 0, + parameter int W_MAX_WAIT_CYCLES = 5, + parameter int RESP_MIN_WAIT_CYCLES = 0, + parameter int RESP_MAX_WAIT_CYCLES = 20, + // AXI feature usage + parameter int AXI_MAX_BURST_LEN = 0, // maximum number of beats in burst; 0 = AXI max (256) + parameter int TRAFFIC_SHAPING = 0, + parameter bit AXI_EXCLS = 1'b0, + parameter bit AXI_ATOPS = 1'b0, + parameter bit AXI_BURST_FIXED = 1'b0, + parameter bit AXI_BURST_INCR = 1'b1, + parameter bit AXI_BURST_WRAP = 1'b1, + parameter bit UNIQUE_IDS = 1'b0, // guarantee that the ID of each transaction is + // unique among all in-flight transactions in the + // same direction + parameter int AC_MIN_WAIT_CYCLES = 0, + parameter int AC_MAX_WAIT_CYCLES = 100, + parameter int CR_MIN_WAIT_CYCLES = 0, + parameter int CR_MAX_WAIT_CYCLES = 5, + parameter int CD_MIN_WAIT_CYCLES = 0, + parameter int CD_MAX_WAIT_CYCLES = 20, + + parameter int MEM_ADDR_SPACE = 8, // Address space for internal memory + parameter int CACHELINE_WIDTH = 0, // How many bytes in a cache line + + // Dependent parameters, do not override. + parameter int CACHELINE_WORD_SIZE = DW/8, // How many bytes in one word + parameter int AXI_STRB_WIDTH = DW/8, + parameter int N_AXI_IDS = 2**IW +); + + typedef ace_driver #( + .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TA), .TT(TT) + ) ace_driver_t; + + typedef logic [AW-1:0] addr_t; + typedef logic [MEM_ADDR_SPACE-1:0] mem_addr_t; + typedef logic [DW-1:0] data_t; + typedef logic [CD_DW-1:0] cd_data_t; + typedef logic [IW-1:0] id_t; + typedef logic [7:0] byte_t; + + // Internal "cache" memory + byte_t memory_q[mem_addr_t]; + + // Bitmask to check whether cache line boundary is crossed + static addr_t CLINE_BOUNDARY_MASK = ~((1 << $clog2(CACHELINE_WIDTH * 8)) - 1); + + localparam int CLINE_WIDTH_PER_DW = CACHELINE_WIDTH / (DW / 8); + localparam int CLINE_WIDTH_PER_CD_DW = CACHELINE_WIDTH / (CD_DW / 8); + + // Driver + ace_driver_t ace_drv; + + semaphore cnt_sem; + + // List of allowed burst types + axi_pkg::burst_t allowed_bursts[$]; + + // Max value for AxSIZE + localparam unsigned max_size = $clog2(DW); + // AxLEN for full cache line transaction + localparam unsigned cline_len = CACHELINE_WIDTH / CACHELINE_WORD_SIZE; + + int unsigned r_flight_cnt[N_AXI_IDS-1:0], + w_flight_cnt[N_AXI_IDS-1:0], + tot_r_flight_cnt, + tot_w_flight_cnt; + + ace_driver_t::ax_beat_t aw_ace_queue[$], w_queue[$]; + ace_driver_t::ac_beat_t ac_cr_queue[$], ac_cd_queue[$]; + + typedef struct packed { + addr_t addr_begin; + addr_t addr_end; + axi_pkg::mem_type_t mem_type; + } mem_region_t; + + mem_region_t mem_map[$]; + + function new( + virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AW), + .AXI_DATA_WIDTH (DW), + .AXI_ID_WIDTH (IW), + .AXI_USER_WIDTH (UW) + ) ace, + virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AW), + .SNOOP_DATA_WIDTH (DW) + ) snoop + ); + this.ace_drv = new(ace, snoop); + this.cnt_sem = new(1); + this.reset(); + if (AXI_BURST_FIXED) begin + this.allowed_bursts.push_back(axi_pkg::BURST_FIXED); + end + if (AXI_BURST_INCR) begin + this.allowed_bursts.push_back(axi_pkg::BURST_INCR); + end + if (AXI_BURST_WRAP) begin + this.allowed_bursts.push_back(axi_pkg::BURST_WRAP); + end + assert(allowed_bursts.size()) else $fatal(1, "At least one burst type has to be specified!"); + endfunction + + function void reset(); + ace_drv.reset_master(); + r_flight_cnt = '{default: 0}; + w_flight_cnt = '{default: 0}; + tot_r_flight_cnt = 0; + tot_w_flight_cnt = 0; + endfunction + + function void init_cache_memory(); + for (int addr = 0; addr < 2**MEM_ADDR_SPACE; addr++) begin + memory_q[addr] = $urandom(); + end + endfunction + + function void add_memory_region( + input addr_t addr_begin, + input addr_t addr_end, + input axi_pkg::mem_type_t mem_type + ); + mem_map.push_back({addr_begin, addr_end, mem_type}); + endfunction + + // Generate random AxSize that + // maps between allowed values + function axi_pkg::size_t gen_rand_size(); + automatic logic rand_success; + axi_pkg::size_t size; + rand_success = std::randomize(size) with { + size >= 0; + size <= max_size; + }; assert(rand_success); + return size; + endfunction + + // Generate random AxLen that + // maps between allowed values + // AxLEN cannot be wider than cache line width + function axi_pkg::len_t gen_rand_len( + input axi_pkg::size_t size, + input logic snoop_trs, + input axi_pkg::burst_t burst + ); + automatic logic rand_success; + axi_pkg::len_t len; + if (snoop_trs) begin + rand_success = std::randomize(len) with { + len inside {1, 2, 4, 8, 16}; + len <= cline_len; + }; assert(rand_success); + if ((burst == axi_pkg::BURST_WRAP) && (len == 1)) begin + // AxLEN 1 not allowed for wrap bursts + len = 2; + end + end else begin + if (burst == axi_pkg::BURST_WRAP) begin + rand_success = std::randomize(len) with { + len inside {2, 4, 8, 16}; + }; assert(rand_success); + end else begin + len = $urandom_range(1, 256); + end + end + return len; + endfunction + + function axi_pkg::burst_t get_rand_burst(); + automatic logic rand_success; + axi_pkg::burst_t burst; + rand_success = std::randomize(burst) with { + burst inside {this.allowed_bursts}; + }; assert(rand_success); + return burst; + endfunction + + function ace_driver_t::ax_beat_t new_rand_burst(input logic is_read); + + automatic ace_driver_t::ax_beat_t ax_ace_beat = new; + automatic axi_pkg::cache_t cache; + automatic axi_pkg::burst_t burst; + automatic id_t id; + automatic axi_pkg::qos_t qos; + automatic addr_t addr; + automatic axi_pkg::len_t len; + automatic axi_pkg::size_t size; + automatic ace_pkg::axbar_t bar; + automatic ace_pkg::axdomain_t domain; + automatic ace_pkg::arsnoop_t snoop; + automatic ace_pkg::awunique_t awunique; + automatic mem_region_t mem_region; + automatic ar_snoop_e ar_trs; + automatic aw_snoop_e aw_trs; + + logic snoop_trs, accepts_dirty, accepts_shared, accepts_dirty_shared; + + cache = axi_pkg::get_arcache(axi_pkg::DEVICE_BUFFERABLE); + burst = get_rand_burst(); + id = $urandom(); + qos = $urandom(); + + + // Most of ACE transactions are restricted to have + // a size of the data bus width + size = max_size; + + awunique = 1'b0; + snoop_trs = 1'b1; + + // Accepted RRESP responses + accepts_dirty = 1'b0; + accepts_shared = 1'b0; + accepts_dirty_shared = 1'b0; + + if (is_read) begin + // Read operation + std::randomize(ar_trs) with + { !(ar_trs inside {ar_unsupported_ops}); }; + case( ar_trs ) + AR_READ_NO_SNOOP: begin + snoop = ace_pkg::ReadNoSnoop; + domain = 'b00; + bar = 'b00; + snoop_trs = 1'b0; + size = gen_rand_size(); + len = gen_rand_len(size, snoop_trs, burst); + end + AR_READ_ONCE: begin + snoop = ace_pkg::ReadOnce; + domain = 'b01; + bar = 'b00; + size = gen_rand_size(); + len = gen_rand_len(size, snoop_trs, burst); + accepts_shared = 1'b1; + end + AR_READ_SHARED: begin + snoop = ace_pkg::ReadShared; + domain = 'b01; + bar = 'b00; + len = cline_len; + accepts_dirty = 1'b1; + accepts_dirty_shared = 1'b1; + accepts_shared = 1'b1; + end + AR_READ_CLEAN: begin + snoop = ace_pkg::ReadClean; + domain = 'b01; + bar = 'b00; + len = cline_len; + accepts_shared = 1'b1; + end + AR_READ_NOT_SHARED_DIRTY: begin + snoop = ace_pkg::ReadNotSharedDirty; + domain = 'b01; + bar = 'b00; + len = cline_len; + accepts_dirty = 1'b1; + accepts_shared = 1'b1; + end + AR_READ_UNIQUE: begin + snoop = ace_pkg::ReadUnique; + domain = 'b01; + bar = 'b00; + len = cline_len; + accepts_dirty = 1'b1; + end + AR_CLEAN_UNIQUE: begin + snoop = ace_pkg::CleanUnique; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AR_MAKE_UNIQUE: begin + snoop = ace_pkg::CleanUnique; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AR_CLEAN_SHARED: begin + snoop = ace_pkg::CleanShared; + domain = 'b01; + bar = 'b00; + len = cline_len; + accepts_shared = 1'b1; + end + AR_CLEAN_INVALID: begin + snoop = ace_pkg::CleanInvalid; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AR_MAKE_INVALID: begin + snoop = ace_pkg::MakeInvalid; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AR_BARRIER: begin + snoop = ace_pkg::Barrier; + domain = 'b01; + bar = 'b01; + len = cline_len; + end + AR_DVM_COMPLETE: begin + snoop = ace_pkg::DVMComplete; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AR_DVM_MESSAGE: begin + snoop = ace_pkg::DVMMessage; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + default: begin + $error("Invalid snoop op enumeration."); + snoop = 'b0000; + domain = 'b00; + bar = 'b00; + len = $urandom(); + end + endcase + end else begin + // Write operation + std::randomize(aw_trs) with + { !(aw_trs inside {aw_unsupported_ops}); }; + case( aw_trs ) + AW_WRITE_NO_SNOOP: begin + snoop = ace_pkg::WriteNoSnoop; + domain = 'b00; + bar = 'b00; + snoop_trs = 1'b0; + size = gen_rand_size(); + len = $urandom(); + end + AW_WRITE_UNIQUE: begin + snoop = ace_pkg::WriteUnique; + domain = 'b01; + bar = 'b00; + size = gen_rand_size(); + len = gen_rand_len(size, snoop_trs, burst); + end + AW_WRITE_LINE_UNIQUE: begin + snoop = ace_pkg::WriteLineUnique; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AW_WRITE_CLEAN: begin + snoop = ace_pkg::WriteClean; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AW_WRITE_BACK: begin + snoop = ace_pkg::WriteBack; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AW_EVICT: begin + snoop = ace_pkg::Evict; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AW_WRITE_EVICT: begin + snoop = ace_pkg::WriteEvict; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AR_MAKE_UNIQUE: begin + snoop = ace_pkg::CleanUnique; + domain = 'b01; + bar = 'b00; + len = cline_len; + end + AW_BARRIER: begin + snoop = ace_pkg::Barrier; + domain = 'b01; + bar = 'b01; + len = cline_len; + end + default: begin + $error("Invalid snoop op enumeration."); + snoop = 'b0000; + domain = 'b00; + bar = 'b00; + len = $urandom(); + end + endcase + end + + mem_region = '{ + addr_begin: '0, + addr_end: '1, + mem_type: axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE + }; + + forever begin + // Randomize address + addr = $urandom_range(mem_region.addr_begin, mem_region.addr_end); + addr[AXI_STRB_WIDTH:0] = '0; // align address to word boundary + if (snoop_trs) begin + if (burst == axi_pkg::BURST_FIXED) begin + $error("FIXED type burst not allowed!"); + end else if (burst == axi_pkg::BURST_INCR) begin + // Assert that transaction does not cross cache line boundary + if (((addr + ((2**size * len)-1)) & CLINE_BOUNDARY_MASK) == (addr & CLINE_BOUNDARY_MASK)) begin + break; + end + end else begin + // WRAP bursts should be fine in all situations + break; + end + end else begin + break; + end + end + + ax_ace_beat.ax_addr = addr; + ax_ace_beat.ax_burst = burst; + ax_ace_beat.ax_size = size; + ax_ace_beat.ax_len = len - 1; + ax_ace_beat.ax_id = id; + ax_ace_beat.ax_qos = qos; + ax_ace_beat.ax_snoop = snoop; + ax_ace_beat.ax_bar = bar; + ax_ace_beat.ax_domain = domain; + ax_ace_beat.ax_awunique = awunique; + + return ax_ace_beat; + + endfunction + + // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with + // `this.drv.ace.clk_i` as `clk` argument. What is the syntax getting an assignable reference? + task automatic rand_wait(input int unsigned min, max); + int unsigned rand_success, cycles; + cycles = $urandom_range(min,max); + // rand_success = std::randomize(cycles) with { + // cycles >= min; + // cycles <= max; + // }; + // assert (rand_success) else $error("Failed to randomize wait cycles!"); + repeat (cycles) @(posedge this.ace_drv.ace.clk_i); + endtask + + task send_ars(input int n_reads); + automatic logic rand_success; + repeat (n_reads) begin + automatic id_t id; + automatic ace_driver_t::ax_beat_t ar_ace_beat = new_rand_burst(1'b1); + while (tot_r_flight_cnt >= MAX_READ_TXNS) begin + rand_wait(1, 1); + end + tot_r_flight_cnt++; + rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); + ace_drv.send_ar(ar_ace_beat); + end + $info("Finish ARs"); + endtask + + task recv_rs(ref logic ar_done); + while (!(ar_done && tot_r_flight_cnt == 0)) begin + automatic ace_driver_t::r_beat_t r_ace_beat; + rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); + if (tot_r_flight_cnt > 0) begin + ace_drv.recv_r(r_ace_beat); + if (r_ace_beat.r_last) begin + cnt_sem.get(); + r_flight_cnt[r_ace_beat.r_id]--; + tot_r_flight_cnt--; + cnt_sem.put(); + end + end + end + $info("Finish Rs"); + endtask + + task create_aws(input int n_writes); + automatic logic rand_success; + repeat (n_writes) begin + automatic bit excl = 1'b0; + automatic ace_driver_t::ax_beat_t aw_ace_beat; + aw_ace_beat = new_rand_burst(1'b0); + while (tot_w_flight_cnt >= MAX_WRITE_TXNS) begin + rand_wait(1, 1); + end + tot_w_flight_cnt++; + aw_ace_queue.push_back(aw_ace_beat); + w_queue.push_back(aw_ace_beat); + end + $info("Finish AW creates"); + endtask + + task send_aws(ref logic aw_done); + while (!(aw_done && aw_ace_queue.size() == 0)) begin + automatic ace_driver_t::ax_beat_t aw_ace_beat; + wait (aw_ace_queue.size() > 0 || (aw_done && aw_ace_queue.size() == 0)); + aw_ace_beat = aw_ace_queue.pop_front(); + rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); + ace_drv.send_aw(aw_ace_beat); + end + $info("Finish AW sends"); + endtask + + task send_ws(ref logic aw_done); + while (!(aw_done && w_queue.size() == 0)) begin + automatic ace_driver_t::ax_beat_t aw_ace_beat; + automatic addr_t addr; + static logic rand_success; + wait (w_queue.size() > 0 || (aw_done && w_queue.size() == 0)); + aw_ace_beat = w_queue.pop_front(); + for (int unsigned i = 0; i < aw_ace_beat.ax_len + 1; i++) begin + automatic ace_driver_t::w_beat_t w_beat = new; + automatic int unsigned begin_byte, end_byte, n_bytes; + automatic logic [AXI_STRB_WIDTH-1:0] rand_strb, strb_mask; + addr = axi_pkg::beat_addr(aw_ace_beat.ax_addr, aw_ace_beat.ax_size, aw_ace_beat.ax_len, + aw_ace_beat.ax_burst, i); + //rand_success = w_beat.randomize(); assert (rand_success); + // Determine strobe. + w_beat.w_strb = '0; + n_bytes = 2**aw_ace_beat.ax_size; + begin_byte = addr % AXI_STRB_WIDTH; + end_byte = ((begin_byte + n_bytes) >> aw_ace_beat.ax_size) << aw_ace_beat.ax_size; + strb_mask = '0; + for (int unsigned b = begin_byte; b < end_byte; b++) + strb_mask[b] = 1'b1; + rand_strb = $urandom(); + //rand_success = std::randomize(rand_strb); assert (rand_success); + w_beat.w_strb |= (rand_strb & strb_mask); + // Determine last. + w_beat.w_last = (i == aw_ace_beat.ax_len); + rand_wait(W_MIN_WAIT_CYCLES, W_MAX_WAIT_CYCLES); + ace_drv.send_w(w_beat); + end + end + $info("Finish Ws"); + endtask + + task recv_bs(ref logic aw_done); + while (!(aw_done && tot_w_flight_cnt == 0)) begin + automatic ace_driver_t::b_beat_t b_beat; + rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); + ace_drv.recv_b(b_beat); + cnt_sem.get(); + w_flight_cnt[b_beat.b_id]--; + tot_w_flight_cnt--; + cnt_sem.put(); + end + $info("Finish Bs"); + endtask + + task recv_acs(ref logic sim_done); + while (!sim_done) begin + automatic ace_driver_t::ac_beat_t ace_ac_beat; + rand_wait(AC_MIN_WAIT_CYCLES, AC_MAX_WAIT_CYCLES); + ace_drv.recv_ac(ace_ac_beat, sim_done); + if (!sim_done) begin + // Determine randomly already here whether this AC causes datatransfer + // Ideally, this would be replaced by looking up the internal cache memory + ace_ac_beat.data_transfer = $urandom_range(0,1); + ac_cr_queue.push_back(ace_ac_beat); + ac_cd_queue.push_back(ace_ac_beat); + end + end + $info("Finish ACs"); + endtask + + task send_crs(ref logic sim_done); + while (!sim_done) begin + automatic logic rand_success; + automatic ace_driver_t::ac_beat_t ace_ac_beat; + automatic ace_driver_t::cr_beat_t ace_cr_beat = new; + wait ((ac_cr_queue.size() > 0) || sim_done); + if (ac_cr_queue.size() > 0) begin + ace_ac_beat = ac_cr_queue.pop_front(); + ace_cr_beat.cr_resp[4:2] = $urandom_range(0,3'b111);//$urandom_range(0,5'b11111); + ace_cr_beat.cr_resp[1] = 1'b0; + ace_cr_beat.cr_resp[0] = ace_ac_beat.data_transfer; + rand_wait(CR_MIN_WAIT_CYCLES, CR_MAX_WAIT_CYCLES); + ace_drv.send_cr(ace_cr_beat); + end + end + $info("CR done"); + endtask + + task send_cds(ref logic sim_done); + while (!sim_done) begin + automatic logic rand_success; + automatic ace_driver_t::ac_beat_t ace_ac_beat; + automatic ace_driver_t::cd_beat_t ace_cd_beat = new; + automatic addr_t byte_addr; + automatic mem_addr_t mem_addr; + automatic cd_data_t cd_word; + wait ((ac_cd_queue.size() > 0) || sim_done); + if (ac_cd_queue.size() > 0) begin + ace_ac_beat = ac_cd_queue.pop_front(); + // If data transfer, send CD data. Otherwise, ignore. + if (ace_ac_beat.data_transfer) begin + mem_addr = ace_ac_beat.ac_addr[MEM_ADDR_SPACE-1:0]; + for (int i = 0; i < CLINE_WIDTH_PER_CD_DW; i++) begin + for (int j = 0; j < (CD_DW / 8); j++) begin + // Compose CD word that is CD_DW bits wide + cd_word[j*(CD_DW/8) +: 8] = memory_q[mem_addr+(i*(CD_DW/8)+j)]; + end + // random response + ace_cd_beat.cd_data = cd_word; + if (i == (CLINE_WIDTH_PER_CD_DW - 1)) begin + ace_cd_beat.cd_last = 1'b1; + end else begin + ace_cd_beat.cd_last = 1'b0; + end + rand_wait(CD_MIN_WAIT_CYCLES, CD_MAX_WAIT_CYCLES); + ace_drv.send_cd(ace_cd_beat); + end + end + end + end + $info("CD done"); + endtask + + task sim_done_task(ref logic first, ref logic second); + forever begin + if (first && second) begin + break; + end + #TT; + end + endtask + + // Issue n_reads random read and n_writes random + // write transactions to an address range. + task run(input int n_reads, input int n_writes); + automatic logic ar_done = 1'b0, + aw_done = 1'b0, + b_done = 1'b0, + r_done = 1'b0, + sim_done = 1'b0; + fork + begin + send_ars(n_reads); + ar_done = 1'b1; + end + begin + recv_rs(ar_done); + r_done = 1'b1; + end + begin + create_aws(n_writes); + aw_done = 1'b1; + end + send_aws(aw_done); + send_ws(aw_done); + begin + recv_bs(aw_done); + b_done = 1'b1; + end + begin + sim_done_task(r_done, b_done); + sim_done = 1'b1; + end + recv_acs(sim_done); + send_crs(sim_done); + send_cds(sim_done); + join + endtask + +endclass + +// Datatype for storing the data that was transferred in +// an AXI transaction +class axi_transaction #( + /// AXI4+ATOP address width + parameter int unsigned AW = 0, + /// AXI4+ATOP data width + parameter int unsigned DW = 0, +); + rand bit [AW-1:0] address; + rand bit [DW-1:0] data; + rand bit write_en; + + function new(); + address = 0; + data = 0; + write_en = 0; + endfunction + +endclass + + +class ace_monitor #( + /// AXI4+ATOP ID width + parameter int unsigned IW = 0, + /// AXI4+ATOP address width + parameter int unsigned AW = 0, + /// AXI4+ATOP data width + parameter int unsigned DW = 0, + /// AXI4+ATOP user width + parameter int unsigned UW = 0, + /// Stimuli test time + parameter time TT = 0ns +); + + typedef axi_transaction #( + .AW(AW), .DW(DW) + ) axi_txn_t; + + typedef ace_driver #( + .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TT), .TT(TT) + ) ace_driver_t; + + ace_driver_t ace_drv; + axi_txn_t axi_txn; + event new_axi_txn_event; + + ace_driver_t::ax_beat_t new_ax_transaction; + + mailbox aw_mbx = new, w_mbx = new, b_mbx = new, + ar_mbx = new, r_mbx = new; + + function new( + virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH(AW), + .AXI_DATA_WIDTH(DW), + .AXI_ID_WIDTH(IW), + .AXI_USER_WIDTH(UW) + ) ace, + virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AW), + .SNOOP_DATA_WIDTH (DW) + ) snoop + ); + this.ace_drv = new(ace, snoop); + this.new_axi_txn_event = new(); + endfunction + + task monitor; + fork + // AW + forever begin + automatic ace_driver_t::ax_beat_t ax; + this.ace_drv.mon_aw(ax); + aw_mbx.put(ax); + end + // W + forever begin + automatic w_beat_t w; + this.drv.mon_w(w); + w_mbx.put(w); + end + // B + forever begin + automatic b_beat_t b; + this.drv.mon_b(b); + b_mbx.put(b); + end + // AR + forever begin + automatic ax_beat_t ax; + this.drv.mon_ar(ax); + ar_mbx.put(ax); + end + // R + forever begin + automatic r_beat_t r; + this.drv.mon_r(r); + r_mbx.put(r); + -> txn_event; + end + join + endtask + +endclass + + +class ace_scoreboard #( + /// AXI4+ATOP ID width + parameter int unsigned IW = 0, + /// AXI4+ATOP address width + parameter int unsigned AW = 0, + /// AXI4+ATOP data width + parameter int unsigned DW = 0, + /// AXI4+ATOP user width + parameter int unsigned UW = 0, + /// Stimuli test time + parameter time TT = 0ns +); + + typedef axi_transaction #( + .AW(AW), .DW(DW) + ) axi_txn_t; + + ref event new_axi_txn_event; + ref axi_txn_t axi_txn; + + // Monitor interface + virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH ( AW ), + .AXI_DATA_WIDTH ( DW ), + .AXI_ID_WIDTH ( IW ), + .AXI_USER_WIDTH ( UW ) + ) ace; + + /// New constructor + function new( + ref event e, + ref axi_txn_t t + ); + this.new_axi_txn_event = e; + this.axi_txn = t; + endfunction + +endpackage \ No newline at end of file diff --git a/src/ace_test.sv b/src/deprecated/ace_test.sv similarity index 90% rename from src/ace_test.sv rename to src/deprecated/ace_test.sv index 6c8d667..125f626 100644 --- a/src/ace_test.sv +++ b/src/deprecated/ace_test.sv @@ -18,6 +18,38 @@ package ace_test; import axi_pkg::*; import ace_pkg::*; + typedef enum logic [3:0] { + AR_READ_NO_SNOOP, + AR_READ_ONCE, + AR_READ_SHARED, + AR_READ_CLEAN, + AR_READ_NOT_SHARED_DIRTY, + AR_READ_UNIQUE, + AR_CLEAN_UNIQUE, + AR_MAKE_UNIQUE, + AR_CLEAN_SHARED, + AR_CLEAN_INVALID, + AR_MAKE_INVALID, + AR_BARRIER, + AR_DVM_COMPLETE, + AR_DVM_MESSAGE + } ar_snoop_e; + + ar_snoop_e ar_unsupported_ops[] = '{AR_READ_NO_SNOOP, AR_BARRIER, AR_DVM_COMPLETE, AR_DVM_MESSAGE}; + + typedef enum logic [2:0] { + AW_WRITE_NO_SNOOP, + AW_WRITE_UNIQUE, + AW_WRITE_LINE_UNIQUE, + AW_WRITE_CLEAN, + AW_WRITE_BACK, + AW_EVICT, + AW_WRITE_EVICT, + AW_BARRIER + } aw_snoop_e; + + aw_snoop_e aw_unsupported_ops[] = '{AW_BARRIER}; + /// The data transferred on a beat on the AW/AR channels. class ace_ax_beat #( parameter AW = 32, @@ -431,7 +463,7 @@ endclass beat.r_user = ace.r_user; cycle_end(); ace.r_ready <= #TA 0; - ace.rack <= #TA 1; + ace.rack <= #TA ace.r_last; cycle_start(); ace.rack <= #TA 0; endtask @@ -576,8 +608,8 @@ endclass typedef axi_pkg::len_t len_t; typedef axi_pkg::size_t size_t; typedef ace_pkg::arsnoop_t snoop_t; // use only arsnoop_t, which is bigger than awsnoop_t - typedef ace_pkg::bar_t bar_t; - typedef ace_pkg::domain_t domain_t; + typedef ace_pkg::axbar_t bar_t; + typedef ace_pkg::axdomain_t domain_t; typedef ace_pkg::awunique_t awunique_t; @@ -691,8 +723,8 @@ endclass automatic int unsigned mem_region_idx; automatic mem_region_t mem_region; automatic int cprob; - automatic logic [2:0] trs; - + ar_snoop_e ar_trs; + aw_snoop_e aw_trs; // No memory regions defined if (mem_map.size() == 0) begin // Return a dummy region @@ -712,9 +744,9 @@ endclass // Randomly pick burst type. burst = BURST_FIXED; - // rand_success = std::randomize(burst) with { - // burst inside {this.allowed_bursts}; - // }; assert(rand_success); + rand_success = std::randomize(burst) with { + burst inside {this.allowed_bursts}; + }; assert(rand_success); ax_ace_beat.ax_burst = burst; // Determine memory type. ax_ace_beat.ax_cache = is_read ? axi_pkg::get_arcache(mem_region.mem_type) : axi_pkg::get_awcache(mem_region.mem_type); @@ -736,7 +768,7 @@ endclass // Randomize address. Make sure that the burst does not cross a 4KiB boundary. forever begin - size = $clog2(AXI_STRB_WIDTH)-1; + size = $clog2(AXI_STRB_WIDTH); // rand_success = std::randomize(size) with { // 2**size <= AXI_STRB_WIDTH; // 2**size <= len; @@ -782,68 +814,171 @@ endclass id = $urandom(); qos = $urandom(); awunique = 0; - trs = $urandom_range(0,7); - size = $clog2(AXI_STRB_WIDTH)-1; - case(trs ) - ace_pkg::READ_NO_SNOOP: begin - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - ace_pkg::READ_ONCE: begin - snoop = 'b0000; - domain = 'b01; - bar = 'b00; - len = 1; - end - ace_pkg::READ_SHARED: begin - snoop = 'b0001; - domain = 'b01; - bar = 'b00; - len = 1; - end - ace_pkg::READ_UNIQUE: begin - snoop = 'b0111; - domain = 'b01; - bar = 'b00; - len = 1; - end - - ace_pkg::CLEAN_UNIQUE: begin - snoop = 'b1011; - domain = 'b01; - bar = 'b00; - len = 0; - end - - ace_pkg::WRITE_NO_SNOOP: begin - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - ace_pkg::WRITE_BACK: begin - snoop = 'b0011; - domain = 'b00; - bar = 'b00; - len = 1; - end - ace_pkg::WRITE_UNIQUE: begin - snoop = 'b0000; - domain = 'b10; - bar = 'b00; - len = 1; - end - + size = $clog2(AXI_STRB_WIDTH); + if (is_read) begin + // Read operation + std::randomize(ar_trs) with { !(ar_trs inside {ar_unsupported_ops}); }; + case( ar_trs ) + AR_READ_NO_SNOOP: begin + snoop = ace_pkg::ReadNoSnoop; + domain = 'b00; + bar = 'b00; + len = $urandom(); + end + AR_READ_ONCE: begin + snoop = ace_pkg::ReadOnce; + domain = 'b01; + bar = 'b00; + len = $urandom_range(0,1); + end + AR_READ_SHARED: begin + snoop = ace_pkg::ReadShared; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_READ_CLEAN: begin + snoop = ace_pkg::ReadClean; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_READ_NOT_SHARED_DIRTY: begin + snoop = ace_pkg::ReadNotSharedDirty; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_READ_UNIQUE: begin + snoop = ace_pkg::ReadUnique; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_CLEAN_UNIQUE: begin + snoop = ace_pkg::CleanUnique; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_MAKE_UNIQUE: begin + snoop = ace_pkg::CleanUnique; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_CLEAN_SHARED: begin + snoop = ace_pkg::CleanShared; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_CLEAN_INVALID: begin + snoop = ace_pkg::CleanInvalid; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_MAKE_INVALID: begin + snoop = ace_pkg::MakeInvalid; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_BARRIER: begin + snoop = ace_pkg::Barrier; + domain = 'b01; + bar = 'b01; + len = 1; + end + AR_DVM_COMPLETE: begin + snoop = ace_pkg::DVMComplete; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_DVM_MESSAGE: begin + snoop = ace_pkg::DVMMessage; + domain = 'b01; + bar = 'b00; + len = 1; + end + default: begin + $error("Invalid snoop op enumeration."); + snoop = 'b0000; + domain = 'b00; + bar = 'b00; + len = $urandom(); + end + endcase + end else begin + // Write operation + std::randomize(aw_trs) with { !(aw_trs inside {aw_unsupported_ops}); }; + case( aw_trs ) + AW_WRITE_NO_SNOOP: begin + snoop = ace_pkg::WriteNoSnoop; + domain = 'b00; + bar = 'b00; + len = $urandom(); + end + AW_WRITE_UNIQUE: begin + snoop = ace_pkg::WriteUnique; + domain = 'b01; + bar = 'b00; + len = 1; + end + AW_WRITE_LINE_UNIQUE: begin + snoop = ace_pkg::WriteLineUnique; + domain = 'b01; + bar = 'b00; + len = 1; + end + AW_WRITE_CLEAN: begin + snoop = ace_pkg::WriteClean; + domain = 'b01; + bar = 'b00; + len = 1; + end + AW_WRITE_BACK: begin + snoop = ace_pkg::WriteBack; + domain = 'b01; + bar = 'b00; + len = 1; + end + AW_EVICT: begin + snoop = ace_pkg::Evict; + domain = 'b01; + bar = 'b00; + len = 1; + end + AW_WRITE_EVICT: begin + snoop = ace_pkg::WriteEvict; + domain = 'b01; + bar = 'b00; + len = 1; + end + AR_MAKE_UNIQUE: begin + snoop = ace_pkg::CleanUnique; + domain = 'b01; + bar = 'b00; + len = 1; + end + AW_BARRIER: begin + snoop = ace_pkg::Barrier; + domain = 'b01; + bar = 'b01; + len = 1; + end + default: begin + $error("Invalid snoop op enumeration."); + snoop = 'b0000; + domain = 'b00; + bar = 'b00; + len = $urandom(); + end + endcase + end - default: begin - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - endcase ax_ace_beat.ax_addr = addr; ax_ace_beat.ax_size = size; @@ -936,7 +1071,7 @@ endclass automatic int unsigned n_bytes; automatic size_t size; automatic addr_t addr_mask; - ar_ace_beat.ax_size = $clog2(AXI_STRB_WIDTH)-1; + ar_ace_beat.ax_size = $clog2(AXI_STRB_WIDTH); // The address must be aligned to the total number of bytes in the burst. ar_ace_beat.ax_addr = ar_ace_beat.ax_addr & ~(2); @@ -1048,6 +1183,7 @@ endclass drv.send_ar(ar_ace_beat); if (ar_ace_beat.ax_lock) excl_queue.push_back(ar_ace_beat); end + $info("Finish ARs"); endtask task recv_rs(ref logic ar_done, aw_done); @@ -1070,6 +1206,7 @@ endclass end end end + $info("Finish Rs"); endtask task create_aws(input int n_writes); @@ -1091,6 +1228,7 @@ endclass aw_ace_queue.push_back(aw_ace_beat); w_queue.push_back(aw_ace_beat); end + $info("Finish AWs"); endtask task send_aws(ref logic aw_done); @@ -1134,6 +1272,7 @@ endclass drv.send_w(w_beat); end end + $info("Finish Ws"); endtask task recv_bs(ref logic aw_done); @@ -1149,6 +1288,7 @@ endclass tot_w_flight_cnt--; cnt_sem.put(); end + $info("Finish Bs"); endtask // Issue n_reads random read and n_writes random write transactions to an address range. diff --git a/src/snoop_test.sv b/src/deprecated/snoop_test.sv similarity index 91% rename from src/snoop_test.sv rename to src/deprecated/snoop_test.sv index 270460c..f59cd7d 100644 --- a/src/snoop_test.sv +++ b/src/deprecated/snoop_test.sv @@ -18,6 +18,19 @@ package snoop_test; import axi_pkg::*; import ace_pkg::*; + typedef enum logic [3:0] { + AC_READ_ONCE = 0, + AC_READ_SHARED = 1, + AC_READ_CLEAN = 2, + AC_READ_NOT_SHARED_DIRTY = 3, + AC_READ_UNIQUE = 4, + AC_CLEAN_SHARED = 5, + AC_CLEAN_INVALID = 6, + AC_MAKE_INVALID = 7, + AC_DVM_COMPLETE = 8, + AC_DVM_MESSAGE = 9 + } ac_snoop_e; + /// The data transferred on a beat on the AC channel. class ace_ac_beat #( parameter AW = 32 @@ -29,7 +42,7 @@ package snoop_test; /// The data transferred on a beat on the CR channel. class ace_cr_beat; - snoop_pkg::crresp_t cr_resp = '0; + ace_pkg::crresp_t cr_resp = '0; endclass /// The data transferred on a beat on the CD channel. @@ -238,9 +251,9 @@ package snoop_test; ) snoop_driver_t; typedef logic [AW-1:0] addr_t; typedef logic [DW-1:0] data_t; - typedef snoop_pkg::acsnoop_t acsnoop_t; - typedef snoop_pkg::acprot_t acprot_t; - typedef snoop_pkg::crresp_t crresp_t; + typedef ace_pkg::acsnoop_t acsnoop_t; + typedef ace_pkg::acprot_t acprot_t; + typedef ace_pkg::crresp_t crresp_t; typedef snoop_driver_t::ace_ac_beat_t ace_ac_beat_t; typedef snoop_driver_t::ace_cr_beat_t ace_cr_beat_t; @@ -277,10 +290,11 @@ package snoop_test; automatic logic rand_success; automatic ace_ac_beat_t ace_ac_beat = new; automatic addr_t addr; - automatic snoop_pkg::acsnoop_t snoop; - automatic snoop_pkg::acprot_t prot; + automatic ace_pkg::acsnoop_t snoop; + automatic ace_pkg::acprot_t prot; automatic int unsigned mem_region_idx; automatic mem_region_t mem_region; + automatic ac_snoop_e trs; // No memory regions defined if (mem_map.size() == 0) begin @@ -303,7 +317,23 @@ package snoop_test; addr = mem_region.addr_begin + $urandom_range(mem_region.addr_end-mem_region.addr_begin+1); ace_ac_beat.ac_addr = addr; - snoop = $urandom(); + + std::randomize(trs) with + {!(trs inside {AC_DVM_MESSAGE, AC_DVM_COMPLETE});}; // DVM not supported for the moment + + case (trs) + AC_READ_ONCE : snoop = ace_pkg::ReadOnce; + AC_READ_SHARED : snoop = ace_pkg::ReadShared; + AC_READ_CLEAN : snoop = ace_pkg::ReadClean; + AC_READ_NOT_SHARED_DIRTY: snoop = ace_pkg::ReadNotSharedDirty; + AC_READ_UNIQUE : snoop = ace_pkg::ReadUnique; + AC_CLEAN_SHARED : snoop = ace_pkg::CleanShared; + AC_CLEAN_INVALID : snoop = ace_pkg::CleanInvalid; + AC_MAKE_INVALID : snoop = ace_pkg::MakeInvalid; + AC_DVM_COMPLETE : snoop = ace_pkg::DVMComplete; + AC_DVM_MESSAGE : snoop = ace_pkg::DVMMessage; + endcase + prot = $urandom(); // rand_success = std::randomize(id); assert(rand_success); @@ -345,7 +375,7 @@ package snoop_test; automatic ace_cd_beat_t ace_cd_beat; rand_wait(CR_MIN_WAIT_CYCLES, CR_MAX_WAIT_CYCLES); drv.recv_cr(ace_cr_beat); - if (!ace_cr_beat.cr_resp.error & ace_cr_beat.cr_resp.dataTransfer) + if (!ace_cr_beat.cr_resp.Error & ace_cr_beat.cr_resp.DataTransfer) drv.recv_cd(ace_cd_beat); end endtask @@ -445,18 +475,18 @@ package snoop_test; automatic ace_cr_beat_t ace_cr_beat = new; wait (ace_ac_queue.size() > 0); ace_ac_beat = ace_ac_queue.pop_front(); - if(ace_ac_beat.ac_snoop == snoop_pkg::CLEAN_INVALID) begin + if(ace_ac_beat.ac_snoop == ace_pkg::CleanInvalid) begin ace_cr_beat.cr_resp = 0; end else begin ace_cr_beat.cr_resp[4:2] = $urandom_range(0,3'b111);//$urandom_range(0,5'b11111); - ace_cr_beat.cr_resp[1] = 'b0; + ace_cr_beat.cr_resp[1] = 1'b0; ace_cr_beat.cr_resp[0] = $urandom_range(0,1); end rand_wait(CR_MIN_WAIT_CYCLES, CR_MAX_WAIT_CYCLES); - drv.send_cr(ace_cr_beat); - if (ace_cr_beat.cr_resp.dataTransfer && !ace_cr_beat.cr_resp.error) begin + if (ace_cr_beat.cr_resp.DataTransfer) begin cd_wait_cnt++; end + drv.send_cr(ace_cr_beat); end endtask @@ -657,7 +687,7 @@ module snoop_chan_logger #( log_string = $sformatf("%0t ns> CR %d RESP: %b, ", $time, no_r_beat, cr_beat); $fdisplay(fd, log_string); - if (cr_beat.dataTransfer && !cr_beat.error) begin + if (cr_beat.DataTransfer && !cr_beat.Error) begin while(cd_queues.size() != 0) begin cd_beat = cd_queues.pop_front(); log_string = $sformatf("%0t ns> CD %d DATA: %h, ", diff --git a/src/snoop_intf.sv b/src/snoop_intf.sv index 269a5ac..985907f 100644 --- a/src/snoop_intf.sv +++ b/src/snoop_intf.sv @@ -13,110 +13,122 @@ // Snoop bus interafces interface SNOOP_BUS #( - parameter int unsigned SNOOP_ADDR_WIDTH = 0, - parameter int unsigned SNOOP_DATA_WIDTH = 0 + parameter int unsigned SNOOP_ADDR_WIDTH = 0, + parameter int unsigned SNOOP_DATA_WIDTH = 0 ); - typedef logic [SNOOP_ADDR_WIDTH-1:0] addr_t; - typedef logic [SNOOP_DATA_WIDTH-1:0] data_t; - - addr_t ac_addr; - snoop_pkg::acprot_t ac_prot; - snoop_pkg::acsnoop_t ac_snoop; - logic ac_valid; - logic ac_ready; - - snoop_pkg::crresp_t cr_resp; - logic cr_valid; - logic cr_ready; - - data_t cd_data; - logic cd_last; - logic cd_valid; - logic cd_ready; - - modport Master ( - input ac_addr, ac_prot, ac_snoop, ac_valid, output ac_ready, - input cr_ready, output cr_valid, cr_resp, - input cd_ready, output cd_data, cd_last, cd_valid - ); - - modport Slave ( - output ac_addr, ac_prot, ac_snoop, ac_valid, input ac_ready, - output cr_ready, input cr_valid, cr_resp, - output cd_ready, input cd_data, cd_last, cd_valid - ); - - - modport Monitor ( - input ac_addr, ac_prot, ac_snoop, ac_valid, ac_ready, + typedef logic [SNOOP_ADDR_WIDTH-1:0] addr_t; + typedef logic [SNOOP_DATA_WIDTH-1:0] data_t; + + addr_t ac_addr; + ace_pkg::acprot_t ac_prot; + ace_pkg::acsnoop_t ac_snoop; + logic ac_valid; + logic ac_ready; + + ace_pkg::crresp_t cr_resp; + logic cr_valid; + logic cr_ready; + + data_t cd_data; + logic cd_last; + logic cd_valid; + logic cd_ready; + + modport Master( + input ac_addr, ac_prot, ac_snoop, ac_valid, + output ac_ready, + input cr_ready, + output cr_valid, cr_resp, + input cd_ready, + output cd_data, cd_last, cd_valid + ); + + modport Slave( + output ac_addr, ac_prot, ac_snoop, ac_valid, + input ac_ready, + output cr_ready, + input cr_valid, cr_resp, + output cd_ready, + input cd_data, cd_last, cd_valid + ); + + + modport Monitor( + input ac_addr, ac_prot, ac_snoop, ac_valid, ac_ready, cr_ready, cr_valid, cr_resp, cd_ready, cd_data, cd_last, cd_valid - ); + ); endinterface /// A clocked SNOOP interface for use in design verification. interface SNOOP_BUS_DV #( - parameter int unsigned SNOOP_ADDR_WIDTH = 0, - parameter int unsigned SNOOP_DATA_WIDTH = 0 -)( - input clk_i + parameter int unsigned SNOOP_ADDR_WIDTH = 0, + parameter int unsigned SNOOP_DATA_WIDTH = 0 +) ( + input clk_i ); - typedef logic [SNOOP_ADDR_WIDTH-1:0] addr_t; - typedef logic [SNOOP_DATA_WIDTH-1:0] data_t; - - addr_t ac_addr; - snoop_pkg::acprot_t ac_prot; - snoop_pkg::acsnoop_t ac_snoop; - logic ac_valid; - logic ac_ready; - - snoop_pkg::crresp_t cr_resp; - logic cr_valid; - logic cr_ready; - - data_t cd_data; - logic cd_last; - logic cd_valid; - logic cd_ready; - - modport Master ( - input ac_addr, ac_prot, ac_snoop, ac_valid, output ac_ready, - input cr_ready, output cr_valid, cr_resp, - input cd_ready, output cd_data, cd_last, cd_valid - ); - - modport Slave ( - output ac_addr, ac_prot, ac_snoop, ac_valid, input ac_ready, - output cr_ready, input cr_valid, cr_resp, - output cd_ready, input cd_data, cd_last, cd_valid - ); - - - modport Monitor ( - input ac_addr, ac_prot, ac_snoop, ac_valid, ac_ready, + typedef logic [SNOOP_ADDR_WIDTH-1:0] addr_t; + typedef logic [SNOOP_DATA_WIDTH-1:0] data_t; + + addr_t ac_addr; + ace_pkg::acprot_t ac_prot; + ace_pkg::acsnoop_t ac_snoop; + logic ac_valid; + logic ac_ready; + + ace_pkg::crresp_t cr_resp; + logic cr_valid; + logic cr_ready; + + data_t cd_data; + logic cd_last; + logic cd_valid; + logic cd_ready; + + modport Master( + input ac_addr, ac_prot, ac_snoop, ac_valid, + output ac_ready, + input cr_ready, + output cr_valid, cr_resp, + input cd_ready, + output cd_data, cd_last, cd_valid + ); + + modport Slave( + output ac_addr, ac_prot, ac_snoop, ac_valid, + input ac_ready, + output cr_ready, + input cr_valid, cr_resp, + output cd_ready, + input cd_data, cd_last, cd_valid + ); + + + modport Monitor( + input ac_addr, ac_prot, ac_snoop, ac_valid, ac_ready, cr_ready, cr_valid, cr_resp, cd_ready, cd_data, cd_last, cd_valid - ); - - // pragma translate_off - `ifndef VERILATOR - // Single-Channel Assertions: Signals including valid must not change between valid and handshake. - // AC - assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_addr))); - assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_snoop))); - assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_prot))); - assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> ac_valid)); - // CR - assert property (@(posedge clk_i) (cr_valid && !cr_ready |=> $stable(cr_resp))); - assert property (@(posedge clk_i) (cr_valid && !cr_ready |=> cr_valid)); - // CD - assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> $stable(cd_data))); - assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> $stable(cd_last))); - assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> cd_valid)); - `endif - // pragma translate_on + ); + + // pragma translate_off +`ifndef VERILATOR + // Single-Channel Assertions: Signals including valid must not change between valid and handshake. + // AC + assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_addr))); + assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_snoop))); + assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_prot))); + assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> ac_valid)); + // CR + assert property (@(posedge clk_i) (cr_valid && !cr_ready |=> $stable(cr_resp))); + assert property (@(posedge clk_i) (cr_valid && !cr_ready |=> cr_valid)); + // CD + assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> $stable(cd_data))); + assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> $stable(cd_last))); + assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> cd_valid)); +`endif + // pragma translate_on endinterface diff --git a/src/snoop_pkg.sv b/src/snoop_pkg.sv deleted file mode 100644 index e1df838..0000000 --- a/src/snoop_pkg.sv +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna -// Copyright (c) 2022 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - - -//! ACE Package -/// Contains all necessary type definitions, constants, and generally useful functions. -package snoop_pkg; - - // CRRESP - typedef struct packed { - logic wasUnique; - logic isShared; - logic passDirty; - logic error; - logic dataTransfer; - } crresp_t; - - /// Support for snoop channels - typedef logic [3:0] acsnoop_t; - typedef logic [2:0] acprot_t; - - // AC snoop encoding - localparam READ_ONCE = 4'b0000; - localparam READ_SHARED = 4'b0001; - localparam READ_CLEAN = 4'b0010; - localparam READ_NOT_SHARED_DIRTY = 4'b0011; - localparam READ_UNIQUE = 4'b0111; - localparam CLEAN_SHARED = 4'b1000; - localparam CLEAN_INVALID = 4'b1001; - localparam CLEAN_UNIQUE = 4'b1011; - localparam MAKE_INVALID = 4'b1101; - localparam DVM_COMPLETE = 4'b1110; - localparam DVM_MESSAGE = 4'b1111; - -endpackage diff --git a/test/tb_ace_ccu_snoop_interconnect.sv b/test/tb_ace_ccu_snoop_interconnect.sv new file mode 100644 index 0000000..946f61f --- /dev/null +++ b/test/tb_ace_ccu_snoop_interconnect.sv @@ -0,0 +1,229 @@ +`include "ace/typedef.svh" +`include "ace/assign.svh" + +`timescale 1ns/1ps + +module tb_ace_ccu_snoop_interconnect import ace_pkg::*; ( + +); + + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + localparam int unsigned AxiAddrWidth = 64; + localparam int unsigned AxiDataWidth = 64; + + localparam int unsigned TbNumMst = 4; + + typedef snoop_test::snoop_rand_slave #( + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .TA ( ApplTime), + .TT ( TestTime), + .RAND_RESP ( '0), + .AC_MIN_WAIT_CYCLES ( 2), + .AC_MAX_WAIT_CYCLES ( 15), + .CR_MIN_WAIT_CYCLES ( 2), + .CR_MAX_WAIT_CYCLES ( 15), + .CD_MIN_WAIT_CYCLES ( 2), + .CD_MAX_WAIT_CYCLES ( 15) + ) snoop_rand_slave_t; + + typedef snoop_test::snoop_rand_master #( + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .TA ( ApplTime), + .TT ( TestTime), + .AC_MIN_WAIT_CYCLES ( 2), + .AC_MAX_WAIT_CYCLES ( 15), + .CR_MIN_WAIT_CYCLES ( 2), + .CR_MAX_WAIT_CYCLES ( 15), + .CD_MIN_WAIT_CYCLES ( 2), + .CD_MAX_WAIT_CYCLES ( 15) + ) snoop_rand_master_t; + + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + + logic clk; + logic rst_n; + + task cycle_start; + #(ApplTime); + endtask + + task cycle_end; + @(posedge clk); + endtask + + // snoop structs + snoop_req_t [TbNumMst-1:0] inp_snoop_req; + snoop_resp_t [TbNumMst-1:0] inp_snoop_resp; + snoop_req_t [TbNumMst-1:0] oup_snoop_req; + snoop_resp_t [TbNumMst-1:0] oup_snoop_resp; + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) inp_snoop [TbNumMst-1:0] (); + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) oup_snoop [TbNumMst-1:0] (); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) inp_snoop_dv [TbNumMst-1:0](clk); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) oup_snoop_dv [TbNumMst-1:0](clk); + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN(inp_snoop[i], inp_snoop_dv[i]) + `SNOOP_ASSIGN(oup_snoop_dv[i], oup_snoop[i]) + `SNOOP_ASSIGN_TO_REQ(inp_snoop_req[i], inp_snoop[i]) + `SNOOP_ASSIGN_FROM_RESP(inp_snoop[i], inp_snoop_resp[i]) + `SNOOP_ASSIGN_FROM_REQ(oup_snoop[i], oup_snoop_req[i]) + `SNOOP_ASSIGN_TO_RESP(oup_snoop_resp[i], oup_snoop[i]) + end + + snoop_rand_master_t snoop_rand_master [TbNumMst]; + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_snoop_mst + initial begin + snoop_rand_master[i] = new( inp_snoop_dv[i] ); + snoop_rand_master[i].reset(); + @(posedge rst_n); + snoop_rand_master[i].run(1024); + end + end + + snoop_rand_slave_t snoop_rand_slave [TbNumMst]; + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_snoop_slv + initial begin + snoop_rand_slave[i] = new( oup_snoop_dv[i] ); + snoop_rand_slave[i].reset(); + @(posedge rst_n); + snoop_rand_slave[i].run(); + end + end + + initial begin : rst_gen + rst_n = 1'b0; + + repeat (5) @(negedge clk); + + rst_n = 1'b1; + end + + initial begin : clk_gen + clk = 1'b0; + forever #(CyclTime/2) clk = !clk; + end + + logic [TbNumMst-1:0][TbNumMst-1:0] inp_sel; + + logic [TbNumMst-1:0] sel_done; + + initial begin + @(posedge rst_n); + cycle_start(); + while (sel_done != '1) begin + cycle_end(); + cycle_start(); + end + cycle_end(); + $finish; + end + + logic [TbNumMst-1:0] sel_done; + + initial begin + @(posedge rst_n); + cycle_start(); + while (sel_done != '1) begin + cycle_end(); + cycle_start(); + end + cycle_end(); + $finish; + end + + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_sel + + localparam int unsigned idx = i; + logic [TbNumMst-1:0] temp_inp_sel; + + initial begin + + sel_done[i] = 1'b0; + + @(posedge rst_n); + + + repeat (64) begin + // Randomize the temp variable with the constraint + std::randomize(temp_inp_sel) with { + temp_inp_sel != '0; + temp_inp_sel[idx] == 1'b0; + }; + // Assign the randomized value to inp_sel[i] + inp_sel[i] <= #(ApplTime) temp_inp_sel; + + cycle_start(); + while (!(inp_snoop_req[i].ac_valid && inp_snoop_resp[i].ac_ready)) begin + + cycle_end(); + cycle_start(); + end + cycle_end(); + + end + sel_done[i] = 1'b1; + end + + end + + logic lup_valid, lup_ready; + + ace_ccu_snoop_interconnect #( + .NumInp (TbNumMst), + .NumOup (TbNumMst), + .ConfCheck (1), + .NumLup (1), + .AddrBase (4), + .AddrLength (16), + .ac_chan_t (snoop_ac_t), + .cr_chan_t (snoop_cr_t), + .cd_chan_t (snoop_cd_t), + .snoop_req_t (snoop_req_t), + .snoop_resp_t (snoop_resp_t) + ) i_dut ( + .clk_i (clk), + .rst_ni (rst_n), + .inp_sel_i (inp_sel), + .inp_req_i (inp_snoop_req), + .inp_resp_o (inp_snoop_resp), + .oup_req_o (oup_snoop_req), + .oup_resp_i (oup_snoop_resp), + .lup_valid_o (lup_valid), + .lup_ready_i (lup_ready), + .lup_addr_o (), + .lup_valid_i (lup_valid), + .lup_ready_o (lup_ready), + .lup_clr_o () + ); + +endmodule diff --git a/test/tb_ace_ccu_top.sv b/test/tb_ace_ccu_top.sv index ea2c27d..f89a9cd 100644 --- a/test/tb_ace_ccu_top.sv +++ b/test/tb_ace_ccu_top.sv @@ -1,5 +1,4 @@ -// Copyright (c) 2019 ETH Zurich and University of Bologna. -// Copyright (c) 2022 PlanV GmbH +// Copyright (c) 2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in @@ -9,528 +8,338 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -// -// Authors: -// - Florian Zaruba -// - Andreas Kurth -// Directed Random Verification Testbench for `axi_xbar`: The crossbar is instantiated with -// a number of random axi master and slave modules. Each random master executes a fixed number of -// writes and reads over the whole addess map. All masters simultaneously issue transactions -// through the crossbar, thereby saturating it. A monitor, which snoops the transactions of each -// master and slave port and models the crossbar with a network of FIFOs, checks whether each -// transaction follows the expected route. +// Directed random verification testbench for `ace_ccu_top`. `include "ace/typedef.svh" `include "ace/assign.svh" +`include "ace/domain.svh" module tb_ace_ccu_top #( - parameter bit TbEnAtop = 1'b1, // enable atomic operations (ATOPs) - parameter bit TbEnExcl = 1'b0, // enable exclusive accesses - parameter bit TbUniqueIds = 1'b0, // restrict to only unique IDs - parameter int unsigned TbNumMst = 32'd4, // how many AXI masters there are - parameter int unsigned TbNumSlv = 32'd1 // how many AXI slaves there are + /// Address space + parameter int unsigned AddrWidth = 0, + /// Memory bus data width + parameter int unsigned DataWidth = 0, + /// Cache word width + parameter int unsigned WordWidth = 0, + /// Words per cache line + parameter int unsigned CachelineWords = 0, + /// Cache ways + parameter int unsigned Ways = 0, + /// Cache sets + parameter int unsigned Sets = 0, + /// Number of cached masters + parameter int unsigned TbNumMst = 0, + /// Number of master groups (a group share the snooping FSM) + parameter int unsigned NoMstGroups = 1, + /// Directory for files + parameter string MemDir = "" ); - // Random master no Transactions - localparam int unsigned NoWrites = 80; // How many writes per master - localparam int unsigned NoReads = 80; // How many reads per master - // timing parameters - localparam time CyclTime = 10ns; - localparam time ApplTime = 2ns; - localparam time TestTime = 8ns; - - // axi configuration - localparam int unsigned AxiIdWidthMasters = 4; - localparam int unsigned AxiIdUsed = 3; // Has to be <= AxiIdWidthMasters - localparam int unsigned AxiIdWidthSlaves = AxiIdWidthMasters + $clog2(TbNumMst)+$clog2(TbNumMst+1); - localparam int unsigned AxiAddrWidth = 32; // Axi Address Width - localparam int unsigned AxiDataWidth = 64; // Axi Data Width - localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; - localparam int unsigned AxiUserWidth = 5; - - // in the bench can change this variables which are set here freely - localparam ace_pkg::ccu_cfg_t ccu_cfg = '{ - NoSlvPorts: TbNumMst, - MaxMstTrans: 10, - MaxSlvTrans: 6, - FallThrough: 1'b1, - LatencyMode: ace_pkg::NO_LATENCY, - AxiIdWidthSlvPorts: AxiIdWidthMasters, - AxiIdUsedSlvPorts: AxiIdUsed, - UniqueIds: TbUniqueIds, - AxiAddrWidth: AxiAddrWidth, - AxiDataWidth: AxiDataWidth - }; - - - typedef logic [AxiIdWidthMasters-1:0] id_mst_t; - typedef logic [AxiIdWidthSlaves-1:0] id_slv_t; - typedef logic [AxiAddrWidth-1:0] addr_t; - typedef logic [AxiDataWidth-1:0] data_t; - typedef logic [AxiStrbWidth-1:0] strb_t; - typedef logic [AxiUserWidth-1:0] user_t; - - `ACE_TYPEDEF_AW_CHAN_T(aw_chan_mst_t, addr_t, id_mst_t, user_t) - `AXI_TYPEDEF_AW_CHAN_T(aw_chan_slv_t, addr_t, id_slv_t, user_t) - `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(b_chan_mst_t, id_mst_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(b_chan_slv_t, id_slv_t, user_t) - - `ACE_TYPEDEF_AR_CHAN_T(ar_chan_mst_t, addr_t, id_mst_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(ar_chan_slv_t, addr_t, id_slv_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(r_chan_mst_t, data_t, id_mst_t, user_t) - `AXI_TYPEDEF_R_CHAN_T(r_chan_slv_t, data_t, id_slv_t, user_t) - - `ACE_TYPEDEF_REQ_T(mst_req_t, aw_chan_mst_t, w_chan_t, ar_chan_mst_t) - `ACE_TYPEDEF_RESP_T(mst_resp_t, b_chan_mst_t, r_chan_mst_t) - `AXI_TYPEDEF_REQ_T(slv_req_t, aw_chan_slv_t, w_chan_t, ar_chan_slv_t) - `AXI_TYPEDEF_RESP_T(slv_resp_t, b_chan_slv_t, r_chan_slv_t) - - `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) - `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) - `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) - `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) - `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) - - - typedef ace_test::ace_rand_master #( - // AXI interface parameters - .AW ( AxiAddrWidth ), - .DW ( AxiDataWidth ), - .IW ( AxiIdWidthMasters ), - .UW ( AxiUserWidth ), - // Stimuli application and test time - .TA ( ApplTime ), - .TT ( TestTime ), - // Maximum number of read and write transactions in flight - .MAX_READ_TXNS ( 20 ), - .MAX_WRITE_TXNS ( 20 ), - .AXI_EXCLS ( TbEnExcl ), - .AXI_ATOPS ( TbEnAtop ), - .UNIQUE_IDS ( TbUniqueIds ) - ) ace_rand_master_t; - typedef axi_test::axi_rand_slave #( - // AXI interface parameters - .AW ( AxiAddrWidth ), - .DW ( AxiDataWidth ), - .IW ( AxiIdWidthSlaves ), - .UW ( AxiUserWidth ), - // Stimuli application and test time - .TA ( ApplTime ), - .TT ( TestTime ) - ) axi_rand_slave_t; - - typedef snoop_test::snoop_rand_slave #( - // ADDR and Data interface parameters - .AW ( AxiAddrWidth ), - .DW ( AxiDataWidth ), - // Stimuli application and test time - .TA ( ApplTime), - .TT ( TestTime), - .RAND_RESP ( '0), - // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels - .AC_MIN_WAIT_CYCLES ( 2), - .AC_MAX_WAIT_CYCLES ( 15), - .CR_MIN_WAIT_CYCLES ( 2), - .CR_MAX_WAIT_CYCLES ( 15), - .CD_MIN_WAIT_CYCLES ( 2), - .CD_MAX_WAIT_CYCLES ( 15) - )snoop_rand_slave_t; - // ------------- - // DUT signals - // ------------- - logic clk; - // DUT signals - logic rst_n; - logic [TbNumMst-1:0] end_of_sim; - - // master structs - mst_req_t [TbNumMst-1:0] masters_req; - mst_resp_t [TbNumMst-1:0] masters_resp; - - // slave structs - slv_req_t [TbNumSlv-1:0] slaves_req; - slv_resp_t [TbNumSlv-1:0] slaves_resp; - - // snoop structs - snoop_req_t [TbNumMst-1:0] snoop_req; - snoop_resp_t [TbNumMst-1:0] snoop_resp; - - - // ------------------------------- - // AXI Interfaces - // ------------------------------- - ACE_BUS #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthMasters ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) master [TbNumMst-1:0] (); - ACE_BUS_DV #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthMasters ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) master_dv [TbNumMst-1:0] (clk); - ACE_BUS_DV #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthMasters ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) master_monitor_dv [TbNumMst-1:0] (clk); - for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_masters - `ACE_ASSIGN (master[i], master_dv[i]) - `ACE_ASSIGN_TO_REQ(masters_req[i], master[i]) - `ACE_ASSIGN_TO_RESP(masters_resp[i], master[i]) - end - - AXI_BUS #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthSlaves ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) slave [TbNumSlv-1:0] (); - AXI_BUS_DV #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthSlaves ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) slave_dv [TbNumSlv-1:0](clk); - AXI_BUS_DV #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthSlaves ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) slave_monitor_dv [TbNumSlv-1:0](clk); - for (genvar i = 0; i < TbNumSlv; i++) begin : gen_conn_dv_slaves - `AXI_ASSIGN(slave_dv[i], slave[i]) - `AXI_ASSIGN_TO_REQ(slaves_req[i], slave[i]) - `AXI_ASSIGN_TO_RESP(slaves_resp[i], slave[i]) - end - - SNOOP_BUS #( - .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), - .SNOOP_DATA_WIDTH ( AxiDataWidth ) - ) snoop [TbNumMst-1:0] (); - SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), - .SNOOP_DATA_WIDTH ( AxiDataWidth ) - ) snoop_dv [TbNumMst-1:0](clk); - SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), - .SNOOP_DATA_WIDTH ( AxiDataWidth ) - ) snoop_monitor_dv [TbNumMst-1:0](clk); - for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop - `SNOOP_ASSIGN(snoop_dv[i], snoop[i]) - `SNOOP_ASSIGN_TO_REQ(snoop_req[i], snoop[i]) - `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop[i]) - end - - // ------------------------------- - // AXI and SNOOP Rand Masters and Slaves - // ------------------------------- - // Masters control simulation run time - ace_rand_master_t ace_rand_master [TbNumMst]; - for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_master + + // timing parameters + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + localparam CachelineBits = CachelineWords * WordWidth; + + // How many cached masters per group + localparam MstPerGroup = TbNumMst / NoMstGroups; + localparam NoGroups = NoMstGroups; + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 4; + localparam int unsigned AxiIdUsed = 3; + localparam int unsigned AxiAddrWidth = AddrWidth; + localparam int unsigned AxiDataWidth = DataWidth; + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + localparam int unsigned WriteBackLen = CachelineWords - 1; + localparam int unsigned WriteBackSize = $clog2(DataWidth / 8); + + localparam ace_ccu_pkg::ace_ccu_user_cfg_t CcuUserCfg = '{ + SlvPorts : TbNumMst, + MaxTransactions : 8, + ShareableWFifoDepth : 4, + ReplayEn : 0, + NLineWidth : AxiAddrWidth - $clog2(CachelineBits / 8), + AxiUniqueIds : 0, + AxiIdLookupBits : 3, + AxiAddrWidth : AxiAddrWidth, + AxiDataWidth : AxiDataWidth, + AxiUserWidth : AxiUserWidth, + AxiSlvIdWidth : AxiIdWidthMasters, + CachelineWidth : CachelineBits, + CutSlvReq : 1, + CutSlvResp : 1, + CutMstReq : 1, + CutMstResp : 1, + CutSnoopReq : 1, + CutSnoopResp : 1 + }; + + localparam ace_ccu_pkg::ace_ccu_cfg_t CcuCfg = ace_ccu_pkg::ace_ccu_build_cfg(CcuUserCfg); + localparam int unsigned AxiIdWidthSlave = CcuCfg.AxiMstIdWidth; + + typedef logic [AxiIdWidthMasters-1:0] id_t; + typedef logic [AxiIdWidthSlave-1:0] id_slv_t; + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiStrbWidth-1:0] strb_t; + typedef logic [AxiUserWidth-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(ace_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T (ace_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T (ace_b_chan_t, id_t, user_t ) + `ACE_TYPEDEF_AR_CHAN_T(ace_ar_chan_t, addr_t, id_t, user_t ) + `ACE_TYPEDEF_R_CHAN_T (ace_r_chan_t, data_t, id_t, user_t ) + `ACE_TYPEDEF_REQ_T (ace_req_t, ace_aw_chan_t, ace_w_chan_t, ace_ar_chan_t) + `ACE_TYPEDEF_RESP_T (ace_resp_t, ace_b_chan_t, ace_r_chan_t) + + `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_W_CHAN_T (axi_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T (axi_b_chan_t, id_slv_t, user_t ) + `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_R_CHAN_T (axi_r_chan_t, data_t, id_slv_t, user_t) + `AXI_TYPEDEF_REQ_T (axi_req_t, axi_aw_chan_t, axi_w_chan_t, axi_ar_chan_t) + `AXI_TYPEDEF_RESP_T (axi_resp_t, axi_b_chan_t, axi_r_chan_t) + + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + logic clk, rst_n; + logic [TbNumMst-1:0] end_of_sim = '0; + + // Defines domain_mask_t and domain_rule_t + `DOMAIN_TYPEDEF_ALL(TbNumMst, mst_bv_t, domain_rule_t) + + domain_rule_t [TbNumMst-1:0] domain_rule; initial begin - ace_rand_master[i] = new( master_dv[i] ); - end_of_sim[i] <= 1'b0; - ace_rand_master[i].add_memory_region(32'h0000_0000, 32'h0000_3000, - axi_pkg::DEVICE_NONBUFFERABLE); - ace_rand_master[i].reset(); - @(posedge rst_n); - ace_rand_master[i].run(NoReads, NoWrites); - end_of_sim[i] <= 1'b1; + for (int i = 0; i < TbNumMst; i++) begin + domain_rule[i].initiator = 1 << i; + domain_rule[i].inner = ~(1 << i); + domain_rule[i].outer = ~(1 << i); + end end - end - snoop_rand_slave_t snoop_rand_slave [TbNumMst]; - for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_snoop - initial begin - snoop_rand_slave[i] = new( snoop_dv[i] ); - snoop_rand_slave[i].reset(); - @(posedge rst_n); - snoop_rand_slave[i].run(); + + // Cache data memory initial state + string data_mem_file_template = {MemDir, "/data_mem_%0d.mem"}; + // Cache tag memory initial state + string tag_mem_file_template = {MemDir, "/tag_mem_%0d.mem"}; + // Cache line status initial state + string status_file_template = {MemDir, "/state_%0d.mem"}; + // Cache transactions + string txn_file_template = {MemDir, "/txns_%0d.txt"}; + // Initial main memory state + string init_main_mem = {MemDir, "/main_mem.mem"}; + // Logged cache state changes + string diff_file_template = {MemDir, "/cache_diff_%0d.txt"}; + string diff_main_mem = {MemDir, "/main_mem_diff.txt"}; + + ACE_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiIdWidthMasters ) + ) ace_dv_intf [TbNumMst-1:0] (clk); + + ACE_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiIdWidthMasters ) + ) ace_intf [TbNumMst-1:0](); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_dv_intf [TbNumMst-1:0](clk); + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_intf [TbNumMst-1:0](); + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlave ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) axi_dv_intf (clk); + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlave ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) axi_intf(); + + MONITOR_BUS_DV #( + .ADDR_WIDTH (AxiAddrWidth), + .DATA_WIDTH ( AxiDataWidth), + .ID_WIDTH ( AxiIdWidthSlave), + .USER_WIDTH (AxiUserWidth) + ) sim_mem_mon_intf (clk); + + // Interface with clock for generating delays + CLK_IF clk_if (clk); + + typedef virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_bus_v_t; + + typedef virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_bus_v_t; + + typedef virtual CLK_IF clk_if_v_t; + + typedef virtual MONITOR_BUS_DV #( + .ADDR_WIDTH (AxiAddrWidth), + .DATA_WIDTH ( AxiDataWidth), + .ID_WIDTH ( AxiIdWidthSlave), + .USER_WIDTH (AxiUserWidth) + ) mon_bus_t; + + + // Clock generator + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no (rst_n) + ); + + cache_test_pkg::mem_logger #( + .AW(AxiAddrWidth), + .DW(AxiDataWidth), + .IW(AxiIdWidthSlave), + .UW(AxiUserWidth), + .TA(ApplTime), + .TT(TestTime), + .mon_bus_t(mon_bus_t) + ) axi_mem_logger; + + cache_test_pkg::cache_top_agent #( + .AW (AxiAddrWidth), + .DW (AxiDataWidth), + .AC_AW (AxiAddrWidth), + .CD_DW (AxiDataWidth), + .IW (AxiIdWidthMasters), + .UW (AxiUserWidth), + .TA (ApplTime), + .TT (TestTime), + .CACHELINE_WORDS (CachelineWords), + .WORD_WIDTH (WordWidth), + .WAYS (Ways), + .SETS (Sets), + .ace_bus_t (ace_bus_v_t), + .snoop_bus_t (snoop_bus_v_t), + .clk_if_t (clk_if_v_t) + ) ace_master [TbNumMst-1:0]; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_cache_agents + `ACE_ASSIGN(ace_intf[i], ace_dv_intf[i]); + end + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN(snoop_dv_intf[i], snoop_intf[i]) end - end + for (genvar i = 0; i < TbNumMst; i++) begin : init_cache_agents + initial begin + string data_mem_file, tag_mem_file, status_file, txn_file; + string diff_file; + $sformat(data_mem_file, data_mem_file_template, i); + $sformat(tag_mem_file, tag_mem_file_template, i); + $sformat(status_file, status_file_template, i); + $sformat(txn_file, txn_file_template, i); + $sformat(diff_file, diff_file_template, i); + ace_master[i] = new( + ace_dv_intf[i], + snoop_dv_intf[i], + clk_if, + data_mem_file, + tag_mem_file, + status_file, + txn_file, + diff_file, + i + ); + ace_master[i].reset(); + @(posedge rst_n); + ace_master[i].run(); + @(posedge clk); + @(posedge clk); + @(posedge clk); + end_of_sim[i] = '1; + end + end + + always @(*) begin + if (&end_of_sim) $finish(); + end - axi_rand_slave_t axi_rand_slave [1]; - for (genvar i = 0; i < TbNumSlv; i++) begin : gen_rand_slave initial begin - axi_rand_slave[i] = new( slave_dv[i] ); - axi_rand_slave[i].reset(); - @(posedge rst_n); - axi_rand_slave[i].run(); + axi_mem_logger = new( + sim_mem_mon_intf, + diff_main_mem + ); + @(posedge rst_n); + axi_mem_logger.run(); end - end - - - - - initial begin : proc_monitor - static tb_ace_ccu_pkg::ace_ccu_monitor #( - .AxiAddrWidth ( AxiAddrWidth ), - .AxiDataWidth ( AxiDataWidth ), - .AxiIdWidthMasters ( AxiIdWidthMasters ), - .AxiIdWidthSlaves ( AxiIdWidthSlaves ), - .AxiUserWidth ( AxiUserWidth ), - .NoMasters ( TbNumMst ), - .NoSlaves ( TbNumSlv ), - .TimeTest ( TestTime ) - ) monitor = new( master_monitor_dv, slave_monitor_dv, snoop_monitor_dv ); - fork - monitor.run(); - do begin - #TestTime; - if(end_of_sim == '1) begin - monitor.print_result(); - $stop(); - end - @(posedge clk); - end while (1'b1); - join - end - - //----------------------------------- - // Clock generator - //----------------------------------- - clk_rst_gen #( - .ClkPeriod ( CyclTime ), - .RstClkCycles ( 5 ) - ) i_clk_gen ( - .clk_o (clk), - .rst_no(rst_n) - ); - - //----------------------------------- - // DUT - //----------------------------------- - ace_ccu_top_intf #( - .AXI_USER_WIDTH ( AxiUserWidth ), - .Cfg ( ccu_cfg ) - ) i_ccu_dut ( - .clk_i ( clk ), - .rst_ni ( rst_n ), - .test_i ( 1'b0 ), - .snoop_ports ( snoop ), - .slv_ports ( master ), - .mst_ports ( slave[0] ) - ); - - // logger for master modules - for (genvar i = 0; i < TbNumMst; i++) begin : gen_master_logger - ace_chan_logger #( - .TestTime ( TestTime ), // Time after clock, where sampling happens - .LoggerName( $sformatf("axi_logger_master_%0d", i)), - .aw_chan_t ( aw_chan_mst_t ), // axi AW type - .w_chan_t ( w_chan_t ), // axi W type - .b_chan_t ( b_chan_mst_t ), // axi B type - .ar_chan_t ( ar_chan_mst_t ), // axi AR type - .r_chan_t ( r_chan_mst_t ) // axi R type - ) i_mst_channel_logger ( - .clk_i ( clk ), // Clock - .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling - .end_sim_i ( &end_of_sim ), - // AW channel - .aw_chan_i ( masters_req[i].aw ), - .aw_valid_i ( masters_req[i].aw_valid ), - .aw_ready_i ( masters_resp[i].aw_ready ), - // W channel - .w_chan_i ( masters_req[i].w ), - .w_valid_i ( masters_req[i].w_valid ), - .w_ready_i ( masters_resp[i].w_ready ), - // B channel - .b_chan_i ( masters_resp[i].b ), - .b_valid_i ( masters_resp[i].b_valid ), - .b_ready_i ( masters_req[i].b_ready ), - // AR channel - .ar_chan_i ( masters_req[i].ar ), - .ar_valid_i ( masters_req[i].ar_valid ), - .ar_ready_i ( masters_resp[i].ar_ready ), - // R channel - .r_chan_i ( masters_resp[i].r ), - .r_valid_i ( masters_resp[i].r_valid ), - .r_ready_i ( masters_req[i].r_ready ) - ); - end - // logger for slave modules - for (genvar i = 0; i < 1; i++) begin : gen_slave_logger - axi_chan_logger #( - .TestTime ( TestTime ), // Time after clock, where sampling happens - .LoggerName( $sformatf("axi_logger_slave_%0d",i)), - .aw_chan_t ( aw_chan_slv_t ), // axi AW type - .w_chan_t ( w_chan_t ), // axi W type - .b_chan_t ( b_chan_slv_t ), // axi B type - .ar_chan_t ( ar_chan_slv_t ), // axi AR type - .r_chan_t ( r_chan_slv_t ) // axi R type - ) i_slv_channel_logger ( - .clk_i ( clk ), // Clock - .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling - .end_sim_i ( &end_of_sim ), - // AW channel - .aw_chan_i ( slaves_req[i].aw ), - .aw_valid_i ( slaves_req[i].aw_valid ), - .aw_ready_i ( slaves_resp[i].aw_ready ), - // W channel - .w_chan_i ( slaves_req[i].w ), - .w_valid_i ( slaves_req[i].w_valid ), - .w_ready_i ( slaves_resp[i].w_ready ), - // B channel - .b_chan_i ( slaves_resp[i].b ), - .b_valid_i ( slaves_resp[i].b_valid ), - .b_ready_i ( slaves_req[i].b_ready ), - // AR channel - .ar_chan_i ( slaves_req[i].ar ), - .ar_valid_i ( slaves_req[i].ar_valid ), - .ar_ready_i ( slaves_resp[i].ar_ready ), - // R channel - .r_chan_i ( slaves_resp[i].r ), - .r_valid_i ( slaves_resp[i].r_valid ), - .r_ready_i ( slaves_req[i].r_ready ) + + + // AXI Simulation Memory + axi_sim_mem_intf #( + // AXI interface parameters + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlave ), + .AXI_USER_WIDTH ( AxiUserWidth ), + .APPL_DELAY ( ApplTime ), + .ACQ_DELAY ( TestTime ) + ) axi_mem ( + .clk_i(clk), + .rst_ni(rst_n), + .axi_slv(axi_intf), + .mon_w_valid_o(sim_mem_mon_intf.w_valid), + .mon_w_addr_o(sim_mem_mon_intf.w_addr), + .mon_w_data_o(sim_mem_mon_intf.w_data), + .mon_w_id_o(sim_mem_mon_intf.w_id), + .mon_w_user_o(sim_mem_mon_intf.w_user), + .mon_w_beat_count_o(sim_mem_mon_intf.w_beat_count), + .mon_w_last_o(sim_mem_mon_intf.w_last), + .mon_r_valid_o(sim_mem_mon_intf.r_valid), + .mon_r_addr_o(sim_mem_mon_intf.r_addr), + .mon_r_data_o(sim_mem_mon_intf.r_data), + .mon_r_id_o(sim_mem_mon_intf.r_id), + .mon_r_user_o(sim_mem_mon_intf.r_user), + .mon_r_beat_count_o(sim_mem_mon_intf.r_beat_count), + .mon_r_last_o(sim_mem_mon_intf.r_last) ); - end - -// logger for snoop modules - for (genvar i = 0; i < TbNumMst; i++) begin : gen_snoop_logger - snoop_chan_logger #( - .TestTime ( TestTime ), // Time after clock, where sampling happens - .LoggerName( $sformatf("axi_logger_snoop_%0d",i)), - .ac_chan_t ( snoop_ac_t ), // AW type - .cr_chan_t ( snoop_cr_t ), // CR type - .cd_chan_t ( snoop_cd_t ) // CD type - ) i_snoop_channel_logger ( - .clk_i ( clk ), // Clock - .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling - .end_sim_i ( &end_of_sim ), - // AC channel - .ac_chan_i ( snoop_req[i].ac ), - .ac_valid_i ( snoop_req[i].ac_valid ), - .ac_ready_i ( snoop_resp[i].ac_ready ), - // CR channel - .cr_chan_i ( snoop_resp[i].cr_resp ), - .cr_valid_i ( snoop_resp[i].cr_valid), - .cr_ready_i ( snoop_req[i].cr_ready ), - // CR channel - .cd_chan_i ( snoop_resp[i].cd ), - .cd_valid_i ( snoop_resp[i].cd_valid), - .cd_ready_i ( snoop_req[i].cd_ready ) + + initial begin + $readmemh(init_main_mem, axi_mem.i_sim_mem.mem); + end + + ace_ccu_top_intf #( + .CCU_CFG (CcuCfg) + ) ccu ( + .clk_i (clk), + .rst_ni (rst_n), + .domain_rule_i (domain_rule), + .slv (ace_intf), + .snoop (snoop_intf), + .mst (axi_intf) ); - end - - for (genvar i = 0; i < TbNumMst; i++) begin : gen_connect_master_monitor - assign master_monitor_dv[i].aw_id = master[i].aw_id ; - assign master_monitor_dv[i].aw_addr = master[i].aw_addr ; - assign master_monitor_dv[i].aw_len = master[i].aw_len ; - assign master_monitor_dv[i].aw_size = master[i].aw_size ; - assign master_monitor_dv[i].aw_burst = master[i].aw_burst ; - assign master_monitor_dv[i].aw_lock = master[i].aw_lock ; - assign master_monitor_dv[i].aw_cache = master[i].aw_cache ; - assign master_monitor_dv[i].aw_prot = master[i].aw_prot ; - assign master_monitor_dv[i].aw_qos = master[i].aw_qos ; - assign master_monitor_dv[i].aw_region = master[i].aw_region; - assign master_monitor_dv[i].aw_atop = master[i].aw_atop ; - assign master_monitor_dv[i].aw_user = master[i].aw_user ; - assign master_monitor_dv[i].aw_valid = master[i].aw_valid ; - assign master_monitor_dv[i].aw_ready = master[i].aw_ready ; - assign master_monitor_dv[i].aw_snoop = master[i].aw_snoop; - assign master_monitor_dv[i].aw_bar = master[i].aw_bar ; - assign master_monitor_dv[i].aw_domain = master[i].aw_domain ; - assign master_monitor_dv[i].aw_awunique = master[i].aw_awunique ; - assign master_monitor_dv[i].w_data = master[i].w_data ; - assign master_monitor_dv[i].w_strb = master[i].w_strb ; - assign master_monitor_dv[i].w_last = master[i].w_last ; - assign master_monitor_dv[i].w_user = master[i].w_user ; - assign master_monitor_dv[i].w_valid = master[i].w_valid ; - assign master_monitor_dv[i].w_ready = master[i].w_ready ; - assign master_monitor_dv[i].b_id = master[i].b_id ; - assign master_monitor_dv[i].b_resp = master[i].b_resp ; - assign master_monitor_dv[i].b_user = master[i].b_user ; - assign master_monitor_dv[i].b_valid = master[i].b_valid ; - assign master_monitor_dv[i].b_ready = master[i].b_ready ; - assign master_monitor_dv[i].ar_id = master[i].ar_id ; - assign master_monitor_dv[i].ar_addr = master[i].ar_addr ; - assign master_monitor_dv[i].ar_len = master[i].ar_len ; - assign master_monitor_dv[i].ar_size = master[i].ar_size ; - assign master_monitor_dv[i].ar_burst = master[i].ar_burst ; - assign master_monitor_dv[i].ar_lock = master[i].ar_lock ; - assign master_monitor_dv[i].ar_cache = master[i].ar_cache ; - assign master_monitor_dv[i].ar_prot = master[i].ar_prot ; - assign master_monitor_dv[i].ar_qos = master[i].ar_qos ; - assign master_monitor_dv[i].ar_region = master[i].ar_region; - assign master_monitor_dv[i].ar_user = master[i].ar_user ; - assign master_monitor_dv[i].ar_valid = master[i].ar_valid ; - assign master_monitor_dv[i].ar_ready = master[i].ar_ready ; - assign master_monitor_dv[i].ar_snoop = master[i].ar_snoop ; - assign master_monitor_dv[i].ar_bar = master[i].ar_bar ; - assign master_monitor_dv[i].ar_domain = master[i].ar_domain ; - assign master_monitor_dv[i].r_id = master[i].r_id ; - assign master_monitor_dv[i].r_data = master[i].r_data ; - assign master_monitor_dv[i].r_resp = master[i].r_resp ; - assign master_monitor_dv[i].r_last = master[i].r_last ; - assign master_monitor_dv[i].r_user = master[i].r_user ; - assign master_monitor_dv[i].r_valid = master[i].r_valid ; - assign master_monitor_dv[i].r_ready = master[i].r_ready ; - end - for (genvar i = 0; i < TbNumSlv; i++) begin : gen_connect_slave_monitor - assign slave_monitor_dv[i].aw_id = slave[i].aw_id ; - assign slave_monitor_dv[i].aw_addr = slave[i].aw_addr ; - assign slave_monitor_dv[i].aw_len = slave[i].aw_len ; - assign slave_monitor_dv[i].aw_size = slave[i].aw_size ; - assign slave_monitor_dv[i].aw_burst = slave[i].aw_burst ; - assign slave_monitor_dv[i].aw_lock = slave[i].aw_lock ; - assign slave_monitor_dv[i].aw_cache = slave[i].aw_cache ; - assign slave_monitor_dv[i].aw_prot = slave[i].aw_prot ; - assign slave_monitor_dv[i].aw_qos = slave[i].aw_qos ; - assign slave_monitor_dv[i].aw_region = slave[i].aw_region; - assign slave_monitor_dv[i].aw_atop = slave[i].aw_atop ; - assign slave_monitor_dv[i].aw_user = slave[i].aw_user ; - assign slave_monitor_dv[i].aw_valid = slave[i].aw_valid ; - assign slave_monitor_dv[i].aw_ready = slave[i].aw_ready ; - assign slave_monitor_dv[i].w_data = slave[i].w_data ; - assign slave_monitor_dv[i].w_strb = slave[i].w_strb ; - assign slave_monitor_dv[i].w_last = slave[i].w_last ; - assign slave_monitor_dv[i].w_user = slave[i].w_user ; - assign slave_monitor_dv[i].w_valid = slave[i].w_valid ; - assign slave_monitor_dv[i].w_ready = slave[i].w_ready ; - assign slave_monitor_dv[i].b_id = slave[i].b_id ; - assign slave_monitor_dv[i].b_resp = slave[i].b_resp ; - assign slave_monitor_dv[i].b_user = slave[i].b_user ; - assign slave_monitor_dv[i].b_valid = slave[i].b_valid ; - assign slave_monitor_dv[i].b_ready = slave[i].b_ready ; - assign slave_monitor_dv[i].ar_id = slave[i].ar_id ; - assign slave_monitor_dv[i].ar_addr = slave[i].ar_addr ; - assign slave_monitor_dv[i].ar_len = slave[i].ar_len ; - assign slave_monitor_dv[i].ar_size = slave[i].ar_size ; - assign slave_monitor_dv[i].ar_burst = slave[i].ar_burst ; - assign slave_monitor_dv[i].ar_lock = slave[i].ar_lock ; - assign slave_monitor_dv[i].ar_cache = slave[i].ar_cache ; - assign slave_monitor_dv[i].ar_prot = slave[i].ar_prot ; - assign slave_monitor_dv[i].ar_qos = slave[i].ar_qos ; - assign slave_monitor_dv[i].ar_region = slave[i].ar_region; - assign slave_monitor_dv[i].ar_user = slave[i].ar_user ; - assign slave_monitor_dv[i].ar_valid = slave[i].ar_valid ; - assign slave_monitor_dv[i].ar_ready = slave[i].ar_ready ; - assign slave_monitor_dv[i].r_id = slave[i].r_id ; - assign slave_monitor_dv[i].r_data = slave[i].r_data ; - assign slave_monitor_dv[i].r_resp = slave[i].r_resp ; - assign slave_monitor_dv[i].r_last = slave[i].r_last ; - assign slave_monitor_dv[i].r_user = slave[i].r_user ; - assign slave_monitor_dv[i].r_valid = slave[i].r_valid ; - assign slave_monitor_dv[i].r_ready = slave[i].r_ready ; - end - for (genvar i = 0; i < TbNumMst; i++) begin : gen_connect_snoop_monitor - assign snoop_monitor_dv[i].ac_valid = snoop[i].ac_valid; - assign snoop_monitor_dv[i].ac_ready = snoop[i].ac_ready; - assign snoop_monitor_dv[i].ac_snoop = snoop[i].ac_snoop; - assign snoop_monitor_dv[i].ac_addr = snoop[i].ac_addr; - assign snoop_monitor_dv[i].ac_prot = snoop[i].ac_prot; - assign snoop_monitor_dv[i].cr_valid = snoop[i].cr_valid; - assign snoop_monitor_dv[i].cr_ready = snoop[i].cr_ready; - assign snoop_monitor_dv[i].cr_resp = snoop[i].cr_resp; - assign snoop_monitor_dv[i].cd_valid = snoop[i].cd_valid; - assign snoop_monitor_dv[i].cd_ready = snoop[i].cd_ready; - assign snoop_monitor_dv[i].cd_data = snoop[i].cd_data; - assign snoop_monitor_dv[i].cd_last = snoop[i].cd_last; - end -endmodule \ No newline at end of file +endmodule diff --git a/test/tb_ace_ccu_top_old.sv b/test/tb_ace_ccu_top_old.sv new file mode 100644 index 0000000..ea2c27d --- /dev/null +++ b/test/tb_ace_ccu_top_old.sv @@ -0,0 +1,536 @@ +// Copyright (c) 2019 ETH Zurich and University of Bologna. +// Copyright (c) 2022 PlanV GmbH +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Florian Zaruba +// - Andreas Kurth + +// Directed Random Verification Testbench for `axi_xbar`: The crossbar is instantiated with +// a number of random axi master and slave modules. Each random master executes a fixed number of +// writes and reads over the whole addess map. All masters simultaneously issue transactions +// through the crossbar, thereby saturating it. A monitor, which snoops the transactions of each +// master and slave port and models the crossbar with a network of FIFOs, checks whether each +// transaction follows the expected route. + +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module tb_ace_ccu_top #( + parameter bit TbEnAtop = 1'b1, // enable atomic operations (ATOPs) + parameter bit TbEnExcl = 1'b0, // enable exclusive accesses + parameter bit TbUniqueIds = 1'b0, // restrict to only unique IDs + parameter int unsigned TbNumMst = 32'd4, // how many AXI masters there are + parameter int unsigned TbNumSlv = 32'd1 // how many AXI slaves there are +); + // Random master no Transactions + localparam int unsigned NoWrites = 80; // How many writes per master + localparam int unsigned NoReads = 80; // How many reads per master + // timing parameters + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 4; + localparam int unsigned AxiIdUsed = 3; // Has to be <= AxiIdWidthMasters + localparam int unsigned AxiIdWidthSlaves = AxiIdWidthMasters + $clog2(TbNumMst)+$clog2(TbNumMst+1); + localparam int unsigned AxiAddrWidth = 32; // Axi Address Width + localparam int unsigned AxiDataWidth = 64; // Axi Data Width + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + + // in the bench can change this variables which are set here freely + localparam ace_pkg::ccu_cfg_t ccu_cfg = '{ + NoSlvPorts: TbNumMst, + MaxMstTrans: 10, + MaxSlvTrans: 6, + FallThrough: 1'b1, + LatencyMode: ace_pkg::NO_LATENCY, + AxiIdWidthSlvPorts: AxiIdWidthMasters, + AxiIdUsedSlvPorts: AxiIdUsed, + UniqueIds: TbUniqueIds, + AxiAddrWidth: AxiAddrWidth, + AxiDataWidth: AxiDataWidth + }; + + + typedef logic [AxiIdWidthMasters-1:0] id_mst_t; + typedef logic [AxiIdWidthSlaves-1:0] id_slv_t; + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiStrbWidth-1:0] strb_t; + typedef logic [AxiUserWidth-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(aw_chan_mst_t, addr_t, id_mst_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_slv_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_mst_t, id_mst_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_slv_t, id_slv_t, user_t) + + `ACE_TYPEDEF_AR_CHAN_T(ar_chan_mst_t, addr_t, id_mst_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_slv_t, addr_t, id_slv_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(r_chan_mst_t, data_t, id_mst_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_slv_t, data_t, id_slv_t, user_t) + + `ACE_TYPEDEF_REQ_T(mst_req_t, aw_chan_mst_t, w_chan_t, ar_chan_mst_t) + `ACE_TYPEDEF_RESP_T(mst_resp_t, b_chan_mst_t, r_chan_mst_t) + `AXI_TYPEDEF_REQ_T(slv_req_t, aw_chan_slv_t, w_chan_t, ar_chan_slv_t) + `AXI_TYPEDEF_RESP_T(slv_resp_t, b_chan_slv_t, r_chan_slv_t) + + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + + typedef ace_test::ace_rand_master #( + // AXI interface parameters + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .IW ( AxiIdWidthMasters ), + .UW ( AxiUserWidth ), + // Stimuli application and test time + .TA ( ApplTime ), + .TT ( TestTime ), + // Maximum number of read and write transactions in flight + .MAX_READ_TXNS ( 20 ), + .MAX_WRITE_TXNS ( 20 ), + .AXI_EXCLS ( TbEnExcl ), + .AXI_ATOPS ( TbEnAtop ), + .UNIQUE_IDS ( TbUniqueIds ) + ) ace_rand_master_t; + typedef axi_test::axi_rand_slave #( + // AXI interface parameters + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .IW ( AxiIdWidthSlaves ), + .UW ( AxiUserWidth ), + // Stimuli application and test time + .TA ( ApplTime ), + .TT ( TestTime ) + ) axi_rand_slave_t; + + typedef snoop_test::snoop_rand_slave #( + // ADDR and Data interface parameters + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + // Stimuli application and test time + .TA ( ApplTime), + .TT ( TestTime), + .RAND_RESP ( '0), + // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels + .AC_MIN_WAIT_CYCLES ( 2), + .AC_MAX_WAIT_CYCLES ( 15), + .CR_MIN_WAIT_CYCLES ( 2), + .CR_MAX_WAIT_CYCLES ( 15), + .CD_MIN_WAIT_CYCLES ( 2), + .CD_MAX_WAIT_CYCLES ( 15) + )snoop_rand_slave_t; + // ------------- + // DUT signals + // ------------- + logic clk; + // DUT signals + logic rst_n; + logic [TbNumMst-1:0] end_of_sim; + + // master structs + mst_req_t [TbNumMst-1:0] masters_req; + mst_resp_t [TbNumMst-1:0] masters_resp; + + // slave structs + slv_req_t [TbNumSlv-1:0] slaves_req; + slv_resp_t [TbNumSlv-1:0] slaves_resp; + + // snoop structs + snoop_req_t [TbNumMst-1:0] snoop_req; + snoop_resp_t [TbNumMst-1:0] snoop_resp; + + + // ------------------------------- + // AXI Interfaces + // ------------------------------- + ACE_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master [TbNumMst-1:0] (); + ACE_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master_dv [TbNumMst-1:0] (clk); + ACE_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master_monitor_dv [TbNumMst-1:0] (clk); + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_masters + `ACE_ASSIGN (master[i], master_dv[i]) + `ACE_ASSIGN_TO_REQ(masters_req[i], master[i]) + `ACE_ASSIGN_TO_RESP(masters_resp[i], master[i]) + end + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave [TbNumSlv-1:0] (); + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave_dv [TbNumSlv-1:0](clk); + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave_monitor_dv [TbNumSlv-1:0](clk); + for (genvar i = 0; i < TbNumSlv; i++) begin : gen_conn_dv_slaves + `AXI_ASSIGN(slave_dv[i], slave[i]) + `AXI_ASSIGN_TO_REQ(slaves_req[i], slave[i]) + `AXI_ASSIGN_TO_RESP(slaves_resp[i], slave[i]) + end + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop [TbNumMst-1:0] (); + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_dv [TbNumMst-1:0](clk); + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_monitor_dv [TbNumMst-1:0](clk); + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN(snoop_dv[i], snoop[i]) + `SNOOP_ASSIGN_TO_REQ(snoop_req[i], snoop[i]) + `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop[i]) + end + + // ------------------------------- + // AXI and SNOOP Rand Masters and Slaves + // ------------------------------- + // Masters control simulation run time + ace_rand_master_t ace_rand_master [TbNumMst]; + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_master + initial begin + ace_rand_master[i] = new( master_dv[i] ); + end_of_sim[i] <= 1'b0; + ace_rand_master[i].add_memory_region(32'h0000_0000, 32'h0000_3000, + axi_pkg::DEVICE_NONBUFFERABLE); + ace_rand_master[i].reset(); + @(posedge rst_n); + ace_rand_master[i].run(NoReads, NoWrites); + end_of_sim[i] <= 1'b1; + end + end + + snoop_rand_slave_t snoop_rand_slave [TbNumMst]; + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_snoop + initial begin + snoop_rand_slave[i] = new( snoop_dv[i] ); + snoop_rand_slave[i].reset(); + @(posedge rst_n); + snoop_rand_slave[i].run(); + end + end + + + axi_rand_slave_t axi_rand_slave [1]; + for (genvar i = 0; i < TbNumSlv; i++) begin : gen_rand_slave + initial begin + axi_rand_slave[i] = new( slave_dv[i] ); + axi_rand_slave[i].reset(); + @(posedge rst_n); + axi_rand_slave[i].run(); + end + end + + + + + initial begin : proc_monitor + static tb_ace_ccu_pkg::ace_ccu_monitor #( + .AxiAddrWidth ( AxiAddrWidth ), + .AxiDataWidth ( AxiDataWidth ), + .AxiIdWidthMasters ( AxiIdWidthMasters ), + .AxiIdWidthSlaves ( AxiIdWidthSlaves ), + .AxiUserWidth ( AxiUserWidth ), + .NoMasters ( TbNumMst ), + .NoSlaves ( TbNumSlv ), + .TimeTest ( TestTime ) + ) monitor = new( master_monitor_dv, slave_monitor_dv, snoop_monitor_dv ); + fork + monitor.run(); + do begin + #TestTime; + if(end_of_sim == '1) begin + monitor.print_result(); + $stop(); + end + @(posedge clk); + end while (1'b1); + join + end + + //----------------------------------- + // Clock generator + //----------------------------------- + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no(rst_n) + ); + + //----------------------------------- + // DUT + //----------------------------------- + ace_ccu_top_intf #( + .AXI_USER_WIDTH ( AxiUserWidth ), + .Cfg ( ccu_cfg ) + ) i_ccu_dut ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .test_i ( 1'b0 ), + .snoop_ports ( snoop ), + .slv_ports ( master ), + .mst_ports ( slave[0] ) + ); + + // logger for master modules + for (genvar i = 0; i < TbNumMst; i++) begin : gen_master_logger + ace_chan_logger #( + .TestTime ( TestTime ), // Time after clock, where sampling happens + .LoggerName( $sformatf("axi_logger_master_%0d", i)), + .aw_chan_t ( aw_chan_mst_t ), // axi AW type + .w_chan_t ( w_chan_t ), // axi W type + .b_chan_t ( b_chan_mst_t ), // axi B type + .ar_chan_t ( ar_chan_mst_t ), // axi AR type + .r_chan_t ( r_chan_mst_t ) // axi R type + ) i_mst_channel_logger ( + .clk_i ( clk ), // Clock + .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling + .end_sim_i ( &end_of_sim ), + // AW channel + .aw_chan_i ( masters_req[i].aw ), + .aw_valid_i ( masters_req[i].aw_valid ), + .aw_ready_i ( masters_resp[i].aw_ready ), + // W channel + .w_chan_i ( masters_req[i].w ), + .w_valid_i ( masters_req[i].w_valid ), + .w_ready_i ( masters_resp[i].w_ready ), + // B channel + .b_chan_i ( masters_resp[i].b ), + .b_valid_i ( masters_resp[i].b_valid ), + .b_ready_i ( masters_req[i].b_ready ), + // AR channel + .ar_chan_i ( masters_req[i].ar ), + .ar_valid_i ( masters_req[i].ar_valid ), + .ar_ready_i ( masters_resp[i].ar_ready ), + // R channel + .r_chan_i ( masters_resp[i].r ), + .r_valid_i ( masters_resp[i].r_valid ), + .r_ready_i ( masters_req[i].r_ready ) + ); + end + // logger for slave modules + for (genvar i = 0; i < 1; i++) begin : gen_slave_logger + axi_chan_logger #( + .TestTime ( TestTime ), // Time after clock, where sampling happens + .LoggerName( $sformatf("axi_logger_slave_%0d",i)), + .aw_chan_t ( aw_chan_slv_t ), // axi AW type + .w_chan_t ( w_chan_t ), // axi W type + .b_chan_t ( b_chan_slv_t ), // axi B type + .ar_chan_t ( ar_chan_slv_t ), // axi AR type + .r_chan_t ( r_chan_slv_t ) // axi R type + ) i_slv_channel_logger ( + .clk_i ( clk ), // Clock + .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling + .end_sim_i ( &end_of_sim ), + // AW channel + .aw_chan_i ( slaves_req[i].aw ), + .aw_valid_i ( slaves_req[i].aw_valid ), + .aw_ready_i ( slaves_resp[i].aw_ready ), + // W channel + .w_chan_i ( slaves_req[i].w ), + .w_valid_i ( slaves_req[i].w_valid ), + .w_ready_i ( slaves_resp[i].w_ready ), + // B channel + .b_chan_i ( slaves_resp[i].b ), + .b_valid_i ( slaves_resp[i].b_valid ), + .b_ready_i ( slaves_req[i].b_ready ), + // AR channel + .ar_chan_i ( slaves_req[i].ar ), + .ar_valid_i ( slaves_req[i].ar_valid ), + .ar_ready_i ( slaves_resp[i].ar_ready ), + // R channel + .r_chan_i ( slaves_resp[i].r ), + .r_valid_i ( slaves_resp[i].r_valid ), + .r_ready_i ( slaves_req[i].r_ready ) + ); + end + +// logger for snoop modules + for (genvar i = 0; i < TbNumMst; i++) begin : gen_snoop_logger + snoop_chan_logger #( + .TestTime ( TestTime ), // Time after clock, where sampling happens + .LoggerName( $sformatf("axi_logger_snoop_%0d",i)), + .ac_chan_t ( snoop_ac_t ), // AW type + .cr_chan_t ( snoop_cr_t ), // CR type + .cd_chan_t ( snoop_cd_t ) // CD type + ) i_snoop_channel_logger ( + .clk_i ( clk ), // Clock + .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling + .end_sim_i ( &end_of_sim ), + // AC channel + .ac_chan_i ( snoop_req[i].ac ), + .ac_valid_i ( snoop_req[i].ac_valid ), + .ac_ready_i ( snoop_resp[i].ac_ready ), + // CR channel + .cr_chan_i ( snoop_resp[i].cr_resp ), + .cr_valid_i ( snoop_resp[i].cr_valid), + .cr_ready_i ( snoop_req[i].cr_ready ), + // CR channel + .cd_chan_i ( snoop_resp[i].cd ), + .cd_valid_i ( snoop_resp[i].cd_valid), + .cd_ready_i ( snoop_req[i].cd_ready ) + ); + end + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_connect_master_monitor + assign master_monitor_dv[i].aw_id = master[i].aw_id ; + assign master_monitor_dv[i].aw_addr = master[i].aw_addr ; + assign master_monitor_dv[i].aw_len = master[i].aw_len ; + assign master_monitor_dv[i].aw_size = master[i].aw_size ; + assign master_monitor_dv[i].aw_burst = master[i].aw_burst ; + assign master_monitor_dv[i].aw_lock = master[i].aw_lock ; + assign master_monitor_dv[i].aw_cache = master[i].aw_cache ; + assign master_monitor_dv[i].aw_prot = master[i].aw_prot ; + assign master_monitor_dv[i].aw_qos = master[i].aw_qos ; + assign master_monitor_dv[i].aw_region = master[i].aw_region; + assign master_monitor_dv[i].aw_atop = master[i].aw_atop ; + assign master_monitor_dv[i].aw_user = master[i].aw_user ; + assign master_monitor_dv[i].aw_valid = master[i].aw_valid ; + assign master_monitor_dv[i].aw_ready = master[i].aw_ready ; + assign master_monitor_dv[i].aw_snoop = master[i].aw_snoop; + assign master_monitor_dv[i].aw_bar = master[i].aw_bar ; + assign master_monitor_dv[i].aw_domain = master[i].aw_domain ; + assign master_monitor_dv[i].aw_awunique = master[i].aw_awunique ; + assign master_monitor_dv[i].w_data = master[i].w_data ; + assign master_monitor_dv[i].w_strb = master[i].w_strb ; + assign master_monitor_dv[i].w_last = master[i].w_last ; + assign master_monitor_dv[i].w_user = master[i].w_user ; + assign master_monitor_dv[i].w_valid = master[i].w_valid ; + assign master_monitor_dv[i].w_ready = master[i].w_ready ; + assign master_monitor_dv[i].b_id = master[i].b_id ; + assign master_monitor_dv[i].b_resp = master[i].b_resp ; + assign master_monitor_dv[i].b_user = master[i].b_user ; + assign master_monitor_dv[i].b_valid = master[i].b_valid ; + assign master_monitor_dv[i].b_ready = master[i].b_ready ; + assign master_monitor_dv[i].ar_id = master[i].ar_id ; + assign master_monitor_dv[i].ar_addr = master[i].ar_addr ; + assign master_monitor_dv[i].ar_len = master[i].ar_len ; + assign master_monitor_dv[i].ar_size = master[i].ar_size ; + assign master_monitor_dv[i].ar_burst = master[i].ar_burst ; + assign master_monitor_dv[i].ar_lock = master[i].ar_lock ; + assign master_monitor_dv[i].ar_cache = master[i].ar_cache ; + assign master_monitor_dv[i].ar_prot = master[i].ar_prot ; + assign master_monitor_dv[i].ar_qos = master[i].ar_qos ; + assign master_monitor_dv[i].ar_region = master[i].ar_region; + assign master_monitor_dv[i].ar_user = master[i].ar_user ; + assign master_monitor_dv[i].ar_valid = master[i].ar_valid ; + assign master_monitor_dv[i].ar_ready = master[i].ar_ready ; + assign master_monitor_dv[i].ar_snoop = master[i].ar_snoop ; + assign master_monitor_dv[i].ar_bar = master[i].ar_bar ; + assign master_monitor_dv[i].ar_domain = master[i].ar_domain ; + assign master_monitor_dv[i].r_id = master[i].r_id ; + assign master_monitor_dv[i].r_data = master[i].r_data ; + assign master_monitor_dv[i].r_resp = master[i].r_resp ; + assign master_monitor_dv[i].r_last = master[i].r_last ; + assign master_monitor_dv[i].r_user = master[i].r_user ; + assign master_monitor_dv[i].r_valid = master[i].r_valid ; + assign master_monitor_dv[i].r_ready = master[i].r_ready ; + end + for (genvar i = 0; i < TbNumSlv; i++) begin : gen_connect_slave_monitor + assign slave_monitor_dv[i].aw_id = slave[i].aw_id ; + assign slave_monitor_dv[i].aw_addr = slave[i].aw_addr ; + assign slave_monitor_dv[i].aw_len = slave[i].aw_len ; + assign slave_monitor_dv[i].aw_size = slave[i].aw_size ; + assign slave_monitor_dv[i].aw_burst = slave[i].aw_burst ; + assign slave_monitor_dv[i].aw_lock = slave[i].aw_lock ; + assign slave_monitor_dv[i].aw_cache = slave[i].aw_cache ; + assign slave_monitor_dv[i].aw_prot = slave[i].aw_prot ; + assign slave_monitor_dv[i].aw_qos = slave[i].aw_qos ; + assign slave_monitor_dv[i].aw_region = slave[i].aw_region; + assign slave_monitor_dv[i].aw_atop = slave[i].aw_atop ; + assign slave_monitor_dv[i].aw_user = slave[i].aw_user ; + assign slave_monitor_dv[i].aw_valid = slave[i].aw_valid ; + assign slave_monitor_dv[i].aw_ready = slave[i].aw_ready ; + assign slave_monitor_dv[i].w_data = slave[i].w_data ; + assign slave_monitor_dv[i].w_strb = slave[i].w_strb ; + assign slave_monitor_dv[i].w_last = slave[i].w_last ; + assign slave_monitor_dv[i].w_user = slave[i].w_user ; + assign slave_monitor_dv[i].w_valid = slave[i].w_valid ; + assign slave_monitor_dv[i].w_ready = slave[i].w_ready ; + assign slave_monitor_dv[i].b_id = slave[i].b_id ; + assign slave_monitor_dv[i].b_resp = slave[i].b_resp ; + assign slave_monitor_dv[i].b_user = slave[i].b_user ; + assign slave_monitor_dv[i].b_valid = slave[i].b_valid ; + assign slave_monitor_dv[i].b_ready = slave[i].b_ready ; + assign slave_monitor_dv[i].ar_id = slave[i].ar_id ; + assign slave_monitor_dv[i].ar_addr = slave[i].ar_addr ; + assign slave_monitor_dv[i].ar_len = slave[i].ar_len ; + assign slave_monitor_dv[i].ar_size = slave[i].ar_size ; + assign slave_monitor_dv[i].ar_burst = slave[i].ar_burst ; + assign slave_monitor_dv[i].ar_lock = slave[i].ar_lock ; + assign slave_monitor_dv[i].ar_cache = slave[i].ar_cache ; + assign slave_monitor_dv[i].ar_prot = slave[i].ar_prot ; + assign slave_monitor_dv[i].ar_qos = slave[i].ar_qos ; + assign slave_monitor_dv[i].ar_region = slave[i].ar_region; + assign slave_monitor_dv[i].ar_user = slave[i].ar_user ; + assign slave_monitor_dv[i].ar_valid = slave[i].ar_valid ; + assign slave_monitor_dv[i].ar_ready = slave[i].ar_ready ; + assign slave_monitor_dv[i].r_id = slave[i].r_id ; + assign slave_monitor_dv[i].r_data = slave[i].r_data ; + assign slave_monitor_dv[i].r_resp = slave[i].r_resp ; + assign slave_monitor_dv[i].r_last = slave[i].r_last ; + assign slave_monitor_dv[i].r_user = slave[i].r_user ; + assign slave_monitor_dv[i].r_valid = slave[i].r_valid ; + assign slave_monitor_dv[i].r_ready = slave[i].r_ready ; + end + for (genvar i = 0; i < TbNumMst; i++) begin : gen_connect_snoop_monitor + assign snoop_monitor_dv[i].ac_valid = snoop[i].ac_valid; + assign snoop_monitor_dv[i].ac_ready = snoop[i].ac_ready; + assign snoop_monitor_dv[i].ac_snoop = snoop[i].ac_snoop; + assign snoop_monitor_dv[i].ac_addr = snoop[i].ac_addr; + assign snoop_monitor_dv[i].ac_prot = snoop[i].ac_prot; + assign snoop_monitor_dv[i].cr_valid = snoop[i].cr_valid; + assign snoop_monitor_dv[i].cr_ready = snoop[i].cr_ready; + assign snoop_monitor_dv[i].cr_resp = snoop[i].cr_resp; + assign snoop_monitor_dv[i].cd_valid = snoop[i].cd_valid; + assign snoop_monitor_dv[i].cd_ready = snoop[i].cd_ready; + assign snoop_monitor_dv[i].cd_data = snoop[i].cd_data; + assign snoop_monitor_dv[i].cd_last = snoop[i].cd_last; + end +endmodule \ No newline at end of file diff --git a/test/tb_ccu_ctrl_r_snoop.sv b/test/tb_ccu_ctrl_r_snoop.sv new file mode 100644 index 0000000..e5481d4 --- /dev/null +++ b/test/tb_ccu_ctrl_r_snoop.sv @@ -0,0 +1,268 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module tb_ccu_ctrl_r_snoop #( + parameter int unsigned AddrWidth = 0, + parameter int unsigned DataWidth = 0, + parameter int unsigned WordWidth = 0, + parameter int unsigned CachelineWords = 0, + parameter int unsigned Ways = 0, + parameter int unsigned Sets = 0, + parameter int unsigned TbNumMst = 0, + parameter string MemDir = "" +); + // Random ace_intf no Transactions + localparam int unsigned NoWrites = 80; // How many writes per ace_intf + localparam int unsigned NoReads = 0; // How many reads per ace_intf + // timing parameters + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 4; + localparam int unsigned AxiIdUsed = 3; + localparam int unsigned AxiIdWidthSlaves = AxiIdWidthMasters + $clog2(TbNumMst)+$clog2(TbNumMst+1); + localparam int unsigned AxiAddrWidth = AddrWidth; + localparam int unsigned AxiDataWidth = DataWidth; + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + localparam int unsigned WriteBackLen = CachelineWords - 1; + localparam int unsigned WriteBackSize = $clog2(DataWidth / 8); + + typedef logic [AxiIdWidthMasters-1:0] id_t; + typedef logic [AxiIdWidthSlaves-1:0] id_slv_t; + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiStrbWidth-1:0] strb_t; + typedef logic [AxiUserWidth-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(slave_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(master_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(slave_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(slave_b_chan_t, id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(slave_ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(master_ar_chan_t, addr_t, id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(slave_r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(master_r_chan_t, data_t, id_t, user_t) + `ACE_TYPEDEF_REQ_T(slv_req_t, slave_aw_chan_t, slave_w_chan_t, slave_ar_chan_t) + `AXI_TYPEDEF_REQ_T(mst_req_t, master_aw_chan_t, slave_w_chan_t, master_ar_chan_t) + `ACE_TYPEDEF_RESP_T(slv_resp_t, slave_b_chan_t, slave_r_chan_t) + `AXI_TYPEDEF_RESP_T(mst_resp_t, slave_b_chan_t, master_r_chan_t) + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + logic clk, rst_n; + + string data_mem_file_template = {MemDir, "/data_mem_%0d.mem"}; + string tag_mem_file_template = {MemDir, "/tag_mem_%0d.mem"}; + string status_file_template = {MemDir, "/state_%0d.mem"}; + string txn_file_template = {MemDir, "/txns_%0d.txt"}; + + ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_intf [TbNumMst] (clk); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_intf [TbNumMst](clk); + + CLK_IF clk_if (clk); + + typedef virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_bus_v_t; + + typedef virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_bus_v_t; + + typedef virtual CLK_IF clk_if_v_t; + + // Connections: + // cache_top_agent -> ACE -> DUT -> ACE -> AXI -> axi_sim_mem + // DUT outputs ACE, but it connects to an AXI interface + // This is fine because each subfield is connected separately + // ace.aw = axi.aw would not work because the structs have different widths + + //----------------------------------- + // Clock generator + //----------------------------------- + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no (rst_n) + ); + + + cache_test_pkg::cache_top_agent #( + .AW(AxiAddrWidth), + .DW(AxiDataWidth), + .AC_AW(AxiAddrWidth), + .CD_DW(AxiDataWidth), + .IW(AxiIdWidthMasters), + .UW(AxiUserWidth), + .TA(ApplTime), + .TT(TestTime), + .CACHELINE_WORDS(CachelineWords), + .WORD_WIDTH(WordWidth), + .WAYS(Ways), + .SETS(Sets), + .ace_bus_t(ace_bus_v_t), + .snoop_bus_t(snoop_bus_v_t), + .clk_if_t(clk_if_v_t) + ) ace_master [TbNumMst]; + + slv_req_t [TbNumMst] masters_req; + slv_resp_t [TbNumMst] masters_resp; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_masters + `ACE_ASSIGN_TO_REQ(masters_req[i], ace_intf[i]) + `ACE_ASSIGN_FROM_RESP(ace_intf[i], masters_resp[i]) + end + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) axi_intf (clk); + + slv_req_t slaves_req; + slv_resp_t slaves_resp; + + mst_req_t main_mem_req; + mst_resp_t main_mem_resp; + + `AXI_ASSIGN_FROM_REQ(axi_intf, slaves_req) + `AXI_ASSIGN_TO_RESP(slaves_resp, axi_intf) + + `AXI_ASSIGN_TO_REQ(main_mem_req, axi_intf) + `AXI_ASSIGN_FROM_RESP(axi_intf, main_mem_resp) + + snoop_req_t [TbNumMst] snoop_req; + snoop_resp_t [TbNumMst] snoop_resp; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN_FROM_REQ(snoop_intf[i], snoop_req[i]) + `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop_intf[i]) + end + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_master + initial begin + string data_mem_file, tag_mem_file, status_file, txn_file; + $sformat(data_mem_file, data_mem_file_template, i); + $sformat(tag_mem_file, tag_mem_file_template, i); + $sformat(status_file, status_file_template, i); + $sformat(txn_file, txn_file_template, i); + ace_master[i] = new( + ace_intf[i], + snoop_intf[i], + clk_if, + data_mem_file, + tag_mem_file, + status_file, + txn_file + ); + ace_master[i].reset(); + @(posedge rst_n); + ace_master[i].run(); + end + end + + axi_sim_mem #( + // AXI interface parameters + .AddrWidth ( AxiAddrWidth ), + .DataWidth ( AxiDataWidth ), + .IdWidth ( AxiIdWidthSlaves ), + .UserWidth ( AxiUserWidth ), + .NumPorts (1), + .axi_req_t(mst_req_t), + .axi_rsp_t(mst_resp_t), + .ApplDelay ( ApplTime ), + .AcqDelay (TestTime ) + ) axi_mem ( + .clk_i(clk), + .rst_ni(rst_n), + .axi_req_i(main_mem_req), + .axi_rsp_o(main_mem_resp), + .mon_w_valid_o(), + .mon_w_addr_o(), + .mon_w_data_o(), + .mon_w_id_o(), + .mon_w_user_o(), + .mon_w_beat_count_o(), + .mon_w_last_o(), + .mon_r_valid_o(), + .mon_r_addr_o(), + .mon_r_data_o(), + .mon_r_id_o(), + .mon_r_user_o(), + .mon_r_beat_count_o(), + .mon_r_last_o() + ); + + initial begin + $readmemh({MemDir, "/main_mem.mem"}, axi_mem.mem); + end + + ace_pkg::snoop_info_t snoopy_trs; + + // DUT + + ace_ar_transaction_decoder #( + .ar_chan_t(slave_ar_chan_t) + ) aw_trs_decoder ( + .ar_i(slaves_req.ar), + .snoop_info_o(snoopy_trs), + .illegal_trs_o(illegal) + ); + + ccu_ctrl_r_snoop #( + .slv_req_t(slv_req_t), + .slv_resp_t(slv_resp_t), + .mst_req_t(slv_req_t), + .mst_resp_t(slv_resp_t), + .slv_ar_chan_t(slave_ar_chan_t), + .mst_snoop_req_t(snoop_req_t), + .mst_snoop_resp_t(snoop_resp_t), + .AXLEN(WriteBackLen), + .AXSIZE(WriteBackSize) + ) DUT ( + .clk_i(clk), + .rst_ni(rst_n), + .snoop_info_i(snoopy_trs), + .slv_req_i(masters_req[0]), + .slv_resp_o(masters_resp[0]), + .mst_req_o(slaves_req), + .mst_resp_i(slaves_resp), + .snoop_resp_i(snoop_resp), + .snoop_req_o(snoop_req), + .ardomain_o() + ); + +endmodule diff --git a/test/tb_ccu_ctrl_wr_snoop.sv b/test/tb_ccu_ctrl_wr_snoop.sv new file mode 100644 index 0000000..bcfc833 --- /dev/null +++ b/test/tb_ccu_ctrl_wr_snoop.sv @@ -0,0 +1,236 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module tb_ccu_ctrl_wr_snoop #( + parameter int unsigned TbNumMst = 32'd1 // how many AXI masters there are +); + // Random ace_intf no Transactions + localparam int unsigned NoWrites = 80; // How many writes per ace_intf + localparam int unsigned NoReads = 0; // How many reads per ace_intf + // timing parameters + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 4; + localparam int unsigned AxiIdUsed = 3; + localparam int unsigned AxiIdWidthSlaves = AxiIdWidthMasters + $clog2(TbNumMst)+$clog2(TbNumMst+1); + localparam int unsigned AxiAddrWidth = 32; + localparam int unsigned AxiDataWidth = 64; + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + + typedef logic [AxiIdWidthMasters-1:0] id_t; + typedef logic [AxiIdWidthSlaves-1:0] id_slv_t; + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiStrbWidth-1:0] strb_t; + typedef logic [AxiUserWidth-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(slave_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(master_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(slave_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(slave_b_chan_t, id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(slave_ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(master_ar_chan_t, addr_t, id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(slave_r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(master_r_chan_t, data_t, id_t, user_t) + `ACE_TYPEDEF_REQ_T(slv_req_t, slave_aw_chan_t, slave_w_chan_t, slave_ar_chan_t) + `AXI_TYPEDEF_REQ_T(mst_req_t, master_aw_chan_t, slave_w_chan_t, master_ar_chan_t) + `ACE_TYPEDEF_RESP_T(slv_resp_t, slave_b_chan_t, slave_r_chan_t) + `AXI_TYPEDEF_RESP_T(mst_resp_t, slave_b_chan_t, master_r_chan_t) + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + logic clk, rst_n; + + ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_intf [TbNumMst] (clk); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_intf [TbNumMst](clk); + + CLK_IF clk_if (clk); + + typedef virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_bus_v_t; + + typedef virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_bus_v_t; + + typedef virtual CLK_IF clk_if_v_t; + + // Connections: + // cache_top_agent -> ACE -> DUT -> ACE -> AXI -> axi_sim_mem + // DUT outputs ACE, but it connects to an AXI interface + // This is fine because each subfield is connected separately + // ace.aw = axi.aw would not work because the structs have different widths + + //----------------------------------- + // Clock generator + //----------------------------------- + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no (rst_n) + ); + + cache_test_pkg::cache_top_agent #( + .AW(AxiAddrWidth), + .DW(AxiDataWidth), + .AC_AW(AxiAddrWidth), + .CD_DW(AxiDataWidth), + .IW(AxiIdWidthMasters), + .UW(AxiUserWidth), + .TA(ApplTime), + .TT(TestTime), + .ace_bus_t(ace_bus_v_t), + .snoop_bus_t(snoop_bus_v_t), + .clk_if_t(clk_if_v_t), + .mem_file("/scratch2/akorsman/ace/src/test/cache/memory_m0.csv") + ) ace_master [TbNumMst]; + + slv_req_t [TbNumMst] masters_req; + slv_resp_t [TbNumMst] masters_resp; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_masters + `ACE_ASSIGN_TO_REQ(masters_req[i], ace_intf[i]) + `ACE_ASSIGN_FROM_RESP(ace_intf[i], masters_resp[i]) + end + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) axi_intf (clk); + + slv_req_t slaves_req; + slv_resp_t slaves_resp; + + mst_req_t main_mem_req; + mst_resp_t main_mem_resp; + + `AXI_ASSIGN_FROM_REQ(axi_intf, slaves_req) + `AXI_ASSIGN_TO_RESP(slaves_resp, axi_intf) + + `AXI_ASSIGN_TO_REQ(main_mem_req, axi_intf) + `AXI_ASSIGN_FROM_RESP(axi_intf, main_mem_resp) + + snoop_req_t [TbNumMst] snoop_req; + snoop_resp_t [TbNumMst] snoop_resp; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN_FROM_REQ(snoop_intf[i], snoop_req[i]) + `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop_intf[i]) + end + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_master + initial begin + ace_master[i] = new(ace_intf[i], snoop_intf[i], clk_if); + ace_master[i].reset(); + @(posedge rst_n); + ace_master[i].run(); + end + end + + axi_sim_mem #( + // AXI interface parameters + .AddrWidth ( AxiAddrWidth ), + .DataWidth ( AxiDataWidth ), + .IdWidth ( AxiIdWidthSlaves ), + .UserWidth ( AxiUserWidth ), + .NumPorts (1), + .axi_req_t(mst_req_t), + .axi_rsp_t(mst_resp_t), + .ApplDelay ( ApplTime ), + .AcqDelay (TestTime ) + ) axi_mem ( + .clk_i(clk), + .rst_ni(rst_n), + .axi_req_i(main_mem_req), + .axi_rsp_o(main_mem_resp), + .mon_w_valid_o(), + .mon_w_addr_o(), + .mon_w_data_o(), + .mon_w_id_o(), + .mon_w_user_o(), + .mon_w_beat_count_o(), + .mon_w_last_o(), + .mon_r_valid_o(), + .mon_r_addr_o(), + .mon_r_data_o(), + .mon_r_id_o(), + .mon_r_user_o(), + .mon_r_beat_count_o(), + .mon_r_last_o() + ); + + initial begin + $readmemb("/scratch2/akorsman/ace/scripts/python/main_mem.mem", axi_mem.mem); + end + + ace_pkg::acsnoop_t snoopy_trs; + + // DUT + + ace_aw_transaction_decoder #( + .aw_chan_t(slave_aw_chan_t) + ) aw_trs_decoder ( + .aw_i(masters_req[0].aw), + .acsnoop_o(snoopy_trs), + .snoop_trs_o(snoop_trs), + .illegal_trs_o(illegal) + ); + + ccu_ctrl_wr_snoop #( + .slv_req_t(slv_req_t), + .slv_resp_t(slv_resp_t), + .mst_req_t(slv_req_t), + .mst_resp_t(slv_resp_t), + .slv_aw_chan_t(slave_aw_chan_t), + .mst_snoop_req_t(snoop_req_t), + .mst_snoop_resp_t(snoop_resp_t) + ) DUT ( + .clk_i(clk), + .rst_ni(rst_n), + .snoop_trs_i(snoopy_trs), + .slv_req_i(masters_req[0]), + .slv_resp_o(masters_resp[0]), + .mst_req_o(slaves_req), + .mst_resp_i(slaves_resp), + .snoop_resp_i(snoop_resp), + .snoop_req_o(snoop_req), + .awdomain_o() + ); + +endmodule diff --git a/test/tb_ccu_ctrl_wr_snoop_old.sv b/test/tb_ccu_ctrl_wr_snoop_old.sv new file mode 100644 index 0000000..5b79aea --- /dev/null +++ b/test/tb_ccu_ctrl_wr_snoop_old.sv @@ -0,0 +1,231 @@ +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module tb_ccu_ctrl_wr_snoop #( +); + + + localparam int unsigned NoWrites = 8000; // How many writes per master + localparam int unsigned NoReads = 0; // How many reads per master + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 1; + localparam int unsigned AxiIdUsed = 1; // Has to be <= AxiIdWidthMasters + localparam int unsigned AxiIdWidthSlaves = 1; + localparam int unsigned AxiAddrWidth = 32; // Axi Address Width + localparam int unsigned AxiDataWidth = 64; // Axi Data Width + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + + // Address space for memory which is initialized + localparam int mem_addr_space = 8; + + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + // in the bench can change this variables which are set here freely + localparam ccu_pkg::ccu_cfg_t ccu_cfg = '{ + NoSlvPorts: 1, + MaxMstTrans: 10, + MaxSlvTrans: 6, + FallThrough: 1'b1, + LatencyMode: ccu_pkg::NO_LATENCY, + AxiIdWidthSlvPorts: AxiIdWidthMasters, + AxiIdUsedSlvPorts: AxiIdUsed, + UniqueIds: 1, + AxiAddrWidth: AxiAddrWidth, + AxiDataWidth: AxiDataWidth + }; + + logic clk, rst_n; + logic end_of_sim; + + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiIdWidthMasters-1:0] id_t; + typedef logic [AxiUserWidth-1:0] user_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiDataWidth/8 -1:0] strb_t; + + `ACE_TYPEDEF_AW_CHAN_T(slave_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(slave_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(slave_b_chan_t, id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(slave_ar_chan_t, addr_t, id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(slave_r_chan_t, data_t, id_t, user_t) + `ACE_TYPEDEF_REQ_T(mst_req_t, slave_aw_chan_t, slave_w_chan_t, slave_ar_chan_t) + `ACE_TYPEDEF_REQ_T(slv_req_t, slave_aw_chan_t, slave_w_chan_t, slave_ar_chan_t) + `ACE_TYPEDEF_RESP_T(mst_resp_t, slave_b_chan_t, slave_r_chan_t) + `ACE_TYPEDEF_RESP_T(slv_resp_t, slave_b_chan_t, slave_r_chan_t) + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + //----------------------------------- + // Clock generator + //----------------------------------- + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no (rst_n) + ); + + ACE_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master (); + ACE_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master_dv (clk); + + mst_req_t masters_req; + mst_resp_t masters_resp; + + `ACE_ASSIGN (master, master_dv) + `ACE_ASSIGN_TO_REQ(masters_req, master) + `ACE_ASSIGN_FROM_RESP(master, masters_resp) + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave (); + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave_dv(clk); + + slv_req_t slaves_req; + slv_resp_t slaves_resp; + + `AXI_ASSIGN(slave_dv, slave) + `AXI_ASSIGN_FROM_REQ(slave, slaves_req) + `AXI_ASSIGN_TO_RESP(slaves_resp, slave) + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop (); + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_dv (clk); + + snoop_req_t snoop_req; + snoop_resp_t snoop_resp; + + `SNOOP_ASSIGN(snoop_dv, snoop) + `SNOOP_ASSIGN_FROM_REQ(snoop, snoop_req) + `SNOOP_ASSIGN_TO_RESP(snoop_resp, snoop) + + + ace_sim_master::ace_rand_master #( + .AW (AxiAddrWidth), + .DW (AxiDataWidth), + .IW (AxiIdWidthMasters), + .UW (AxiUserWidth), + .MAX_READ_TXNS (20), + .MAX_WRITE_TXNS (20), + .UNIQUE_IDS (1), + .TA ( ApplTime ), + .TT (TestTime ), + .CACHELINE_WIDTH (32), + .MEM_ADDR_SPACE (mem_addr_space) + ) ace_master; + + axi_test::axi_rand_slave #( + // AXI interface parameters + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .IW ( AxiIdWidthSlaves ), + .UW ( AxiUserWidth ), + .TA ( ApplTime ), + .TT (TestTime ) + ) axi_rand_slave; + + snoop_chan_logger #( + .TestTime (TestTime), + .LoggerName ( "snoop_logger" ), + .ac_chan_t (snoop_ac_t), + .cr_chan_t (snoop_cr_t), + .cd_chan_t (snoop_cd_t) + ) snoop_chan_logger ( + .clk_i (clk), + .rst_ni (rst_n), + .end_sim_i (end_of_sim), + .ac_chan_i (snoop_req.ac), + .ac_valid_i (snoop_req.ac_valid), + .ac_ready_i (snoop_resp.ac_ready), + .cr_chan_i (snoop_resp.cr_resp), + .cr_valid_i (snoop_resp.cr_valid), + .cr_ready_i (snoop_req.cr_ready), + .cd_chan_i (snoop_resp.cd), + .cd_valid_i (snoop_resp.cd_valid), + .cd_ready_i ( snoop_req.cd_ready) + ); + + initial begin + ace_master = new(master_dv, snoop_dv); + end_of_sim <= 1'b0; + ace_master.add_memory_region( + 32'h0000_0000, 32'h0000_3000, + axi_pkg::DEVICE_NONBUFFERABLE); + ace_master.init_cache_memory(); + ace_master.reset(); + @(posedge rst_n); + ace_master.run(NoReads, NoWrites); + end_of_sim <= 1'b1; + $finish; + end + + initial begin + axi_rand_slave = new(slave_dv); + axi_rand_slave.reset(); + @(posedge rst_n); + axi_rand_slave.run(); + end + + ace_pkg::acsnoop_t snoopy_trs; + logic snoop_trs, illegal; + + ace_aw_transaction_decoder #( + .aw_chan_t(slave_aw_chan_t) + ) aw_trs_decoder ( + .aw_i(slaves_req.aw), + .acsnoop_o(snoopy_trs), + .snoop_trs_o(snoop_trs), + .illegal_trs_o(illegal) + ); + + ccu_ctrl_wr_snoop #( + .slv_req_t(slv_req_t), + .slv_resp_t(slv_resp_t), + .mst_req_t(mst_req_t), + .mst_resp_t(mst_resp_t), + .slv_aw_chan_t(slave_aw_chan_t), + .mst_snoop_req_t(snoop_req_t), + .mst_snoop_resp_t(snoop_resp_t) + ) DUT ( + .clk_i(clk), + .rst_ni(rst_n), + .snoop_trs_i(snoopy_trs), + .slv_req_i(masters_req), + .slv_resp_o(masters_resp), + .mst_req_o(slaves_req), + .mst_resp_i(slaves_resp), + .snoop_resp_i(snoop_resp), + .snoop_req_o(snoop_req) + ); + +endmodule \ No newline at end of file diff --git a/test/vip/Python_README.md b/test/vip/Python_README.md new file mode 100644 index 0000000..ff0ab52 --- /dev/null +++ b/test/vip/Python_README.md @@ -0,0 +1,42 @@ +# Coherency framework + +The purpose of the Python framework is the following: +- Generate a coherent, randomized initial state for caches and main memory +- Generate randomized cache transaction +- Reconstruct the state of caches and main memory after simulation +- Run a coherency check + +The file `cache_coherency_test.py` contains a command line interface for generating the initial states and running the coherency check. The CLI is replicated in the Makefile of this repository. See the README of this repository for instructions how to run it. + +## Components + +The main class is `CacheCoherencyTest`. The example tests included, `RandomTest` and `ConflictTest`, extend this class. It provides functions to either generate a randomized initial state and transactions, or define them manually. It also contains methods for running coherency check. + +`CacheCoherencyTest` includes the following key components: +- `caches` - a list of `CacheState` elements +- `mem_state` - a `MemoryState` element +- `transactions` - a list of `CacheTransactionSequence` elements +- `mem_ranges` - a list of `MemoryRange` elements + +## Principle + +The operating principle of the Python flow is as follows: +1. Generate the initial state +1. Run a coherency check on the initial state +1. Generate the transactions +1. (run the RTL simulation) +1. Start to reconstruct state from the logfiles generated by SystemVerilog +1. After each change, run the coherency check +1. Check from logfile whether the check was succesful + +## Coherency check +By default, the coherency check is run at two occasions: +- After generating the initial state +- During state reconstruction - here the check is run after each change in cache to detect the exact timestamp when coherency was lost. + +The check checks for the following conditions: +- A modified (i.e. different from main memory) cache line must not be in Exclusive state +- A modified cache line must be in either Owned or Modified state in one of the caches +- Cache line states must be compatible (e.g. one cache line in both Modified and Shared states is not allowed) + +The checking is implementing in a very robust way (all cache entries are checked each timestamp), so, unless someone implements a more optimized algorithm, for larger cache sizes the check can take an unbearably long time. Thus, it is recommended to keep the memory and cache sizes around the same size as what is provided by default. It also makes sense because smaller cache and memory sizes generate more snoop traffic. diff --git a/test/vip/SV_README.md b/test/vip/SV_README.md new file mode 100644 index 0000000..40c2486 --- /dev/null +++ b/test/vip/SV_README.md @@ -0,0 +1,84 @@ +# Open-source ACE verification IP + +These IPs target the AXI4 ACE specification, specifically the issue E. + +## Limitations +- Does not test exclusive access +- Does not test cache maintenance operations +- Does not test barriers +- Does not test DVMs +- Does not support snoop filtering +- Does not support ACE-Lite + +## Transaction converage +The cache model currently generates the following ACE transactions: + - ReadUnique + - ReadShared + - CleanUnique + - WriteUnique + - WriteLineUnique + - WriteBack + +## ACE + +### ace_agent +Collects all ace related components. + +### ace_beat_types +Collects the datatypes for ACE transactions. + +### ace_driver +Component which receives ACE transactions in a mailbox (AW, W and AR channels) and drives the ACE interface signals accordingly. It also drives the signals to receive responses (R and B), but does not collect them. + +### ace_monitor +Component which monitors the ACE interface signals and detects received B or R responses. Puts the responses in a mailbox. + +### ace_sequencer +Contains the abstract class `ace_sequencer` and two classes `ace_rand_sequencer` and `ace_mbox_sequencer` which extend the abstract class. In this project, `ace_mbox_sequencer` is used, but the other one can also be used for a standalone testbench without coherency-checking. + +`ace_mbox_sequencer` receives ACE transactions in a mailbox and sends them to `ace_driver`. + +## Snoop + +### snoop_agent +Collects all snoop-channel related components. + +### snoop_beat_types +Collects the datatypes for snoop transactions, i.e. AC/CD/CR + +### snoop_driver +Component which receives snoop responses in a mailbox and drives the snoop interface signals (CR and CD). It also drives the ready signal of the AC channel, toggling it randomly. + +### snoop_monitor +Component which monitors the snoop interface's AC channel and puts the received transaction into a mailbox. + +### snoop_sequencer +Receives AC transactions in a mailbox and converts them into cache requests. Requests the cache for data. Receives the response, and puts the CR and possible CD responses into a mailbox. + +This class also contains functions to generate a random response instead of requesting the cache. + +## Cache + +### cache_beat_types +Collects the datatypes for cache and memory transactions. These are in custom format. + +### cache_scoreboard +Contains the behavioral model for an n-way set-associative cache. It has two sources of requests - cache requests from `cache_sequencer` and snoop requests from `snoop_sequencer`. These are not arbitrated in any way, as it is an interconnect requirement that a cache line must not see a snoop to the same cache line during an outstanding transaction. + +Large part of the cache model is to generate correct ACE transactions for a specific situation. For example, a non-cached write generates a WriteUnique transaction, while a cached write would generate a ReadUnique transaction for the allocation of the cache line. + +The cache can be initialized from a file. The Python part of this framework handles the generation of a randomized, cache-coherent initial state. + +All cache modifications are logged in a file for both debugging purposes and to reconstruct it later in Python domain for coherency check. + +### cache_sequencer +Generates cache requests from a transaction file which is generated by the Python scripts. + +### cache_top_agent +Collects all verification components. This is the module to instantiate in the testbench. + +### mem_logger +Connects to `axi_sim_mem`. Logs changes in memory to a file for both debugging purposes and to reconstruct the state in Python. + +### mem_sequencer +Receives memory requests in a mailbox and converts them into ACE requests. Puts them into a mailbox. diff --git a/test/vip/ace/ace_agent.svh b/test/vip/ace/ace_agent.svh new file mode 100644 index 0000000..933d8bc --- /dev/null +++ b/test/vip/ace/ace_agent.svh @@ -0,0 +1,112 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif +class ace_agent #( + /// Address width + parameter AW = 32, + /// Data width + parameter DW = 32, + /// ID width + parameter IW = 8 , + /// User width + parameter UW = 1, + /// Stimuli application time + parameter time TA = 0ns, + /// Stimuli test time + parameter time TT = 0ns, + /// ACE bus interface type + parameter type ace_bus_t = logic, + /// Clock interface type + parameter type clk_if_t = logic, + parameter type aw_beat_t = logic, + parameter type w_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type r_beat_t = logic, + parameter type b_beat_t = logic +); + + mailbox #(aw_beat_t) i_aw_mbx = new; + mailbox #(w_beat_t) i_w_mbx = new; + mailbox #(ar_beat_t) i_ar_mbx = new; + + mailbox #(aw_beat_t) aw_mbx; + mailbox #(w_beat_t) w_mbx; + mailbox #(ar_beat_t) ar_mbx; + mailbox #(r_beat_t) r_mbx; + mailbox #(b_beat_t) b_mbx; + + ace_bus_t ace; + clk_if_t clk_if; + + ace_driver #( + .AW(AW), .DW(DW), .IW(IW), + .UW(UW), .TA(TA), .TT(TT), + .ace_bus_t(ace_bus_t), + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .w_beat_t(w_beat_t) + ) ace_drv; + + ace_mbox_sequencer #( + .AW(AW), .IW(IW), .UW(UW), .DW(DW), + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .w_beat_t(w_beat_t), + .RAND_WAIT(0) + ) ace_seq; + + ace_monitor #( + .TA(TA), .TT(TT), + .ace_bus_t(ace_bus_t), + .ar_beat_t(ar_beat_t), + .r_beat_t(r_beat_t), + .b_beat_t(b_beat_t) + ) ace_mon; + + function new( + ace_bus_t ace, + clk_if_t clk_if, + mailbox #(aw_beat_t) aw_mbx, + mailbox #(w_beat_t) w_mbx, + mailbox #(ar_beat_t) ar_mbx, + mailbox #(r_beat_t) r_mbx, + mailbox #(b_beat_t) b_mbx + ); + this.ace = ace; + this.clk_if = clk_if; + + this.aw_mbx = aw_mbx; + this.w_mbx = w_mbx; + this.ar_mbx = ar_mbx; + this.r_mbx = r_mbx; + this.b_mbx = b_mbx; + + this.ace_drv = new( + this.ace, this.i_aw_mbx, + this.i_w_mbx, this.i_ar_mbx + ); + this.ace_seq = new( + this.clk_if, this.i_aw_mbx, + this.i_w_mbx, this.i_ar_mbx, + this.aw_mbx, this.w_mbx, + this.ar_mbx + ); + this.ace_mon = new( + this.ace, this.ar_mbx, + this.r_mbx, this.b_mbx + ); + endfunction + + task reset; + this.ace_drv.reset(); + endtask + + task run; + fork + this.ace_drv.run(); + this.ace_seq.run(); + this.ace_mon.run(); + join + endtask + +endclass diff --git a/test/vip/ace/ace_beat_types.svh b/test/vip/ace/ace_beat_types.svh new file mode 100644 index 0000000..bf47e21 --- /dev/null +++ b/test/vip/ace/ace_beat_types.svh @@ -0,0 +1,87 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif +/// The data transferred on a beat on the AW/AR channels. +class ace_ax_beat #( + parameter AW = 32, + parameter IW = 8 , + parameter UW = 1, + parameter SNP_W = 4 +); + rand logic [IW-1:0] id = '0; + rand logic [AW-1:0] addr = '0; + logic [7:0] len = '0; + logic [2:0] size = '0; + logic [1:0] burst = '0; + logic lock = '0; + logic [3:0] cache = '0; + logic [2:0] prot = '0; + rand logic [3:0] qos = '0; + logic [3:0] region = '0; + rand logic [UW-1:0] user = '0; + rand logic [1:0] bar = '0; + rand logic [1:0] domain = '0; + rand logic [SNP_W-1:0] snoop = '0; +endclass + +class ace_aw_beat #( + parameter AW = 32, + parameter IW = 8 , + parameter UW = 1 +) extends ace_ax_beat #( + .AW(AW), .IW(IW), .UW(UW), .SNP_W(3) +); + logic [5:0] atop = '0; + rand logic awunique = '0; +endclass + +class ace_ar_beat #( + parameter AW = 32, + parameter IW = 8 , + parameter UW = 1 +) extends ace_ax_beat #( + .AW(AW), .IW(IW), .UW(UW), .SNP_W(4) +); +endclass + +class ace_ax_comb_beat #( + parameter AW = 32, + parameter IW = 8 , + parameter UW = 1 +) extends ace_ax_beat #( + .AW(AW), .IW(IW), .UW(UW), .SNP_W(4) +); + logic [5:0] atop = '0; + rand logic awunique = '0; +endclass + +class ace_r_beat #( + parameter DW = 32, + parameter IW = 8 , + parameter UW = 1 +); + rand logic [IW-1:0] id = '0; + rand logic [DW-1:0] data = '0; + ace_pkg::rresp_t resp = '0; + logic last = '0; + rand logic [UW-1:0] user = '0; +endclass + +class ace_w_beat #( + parameter DW = 32, + parameter UW = 1 +); + rand logic [DW-1:0] data = '0; + rand logic [DW/8-1:0] strb = '0; + logic last = '0; + rand logic [UW-1:0] user = '0; +endclass + +class ace_b_beat #( + parameter IW = 8, + parameter UW = 1 +); + rand logic [IW-1:0] id = '0; + logic [1:0] resp = '0; + rand logic [UW-1:0] user = '0; +endclass \ No newline at end of file diff --git a/test/vip/ace/ace_driver.svh b/test/vip/ace/ace_driver.svh new file mode 100644 index 0000000..003ef90 --- /dev/null +++ b/test/vip/ace/ace_driver.svh @@ -0,0 +1,236 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif +class ace_driver #( + parameter AW = 32, + parameter DW = 32, + parameter IW = 8 , + parameter UW = 1, + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type ace_bus_t = logic, + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type w_beat_t = logic +); + aw_beat_t aw_txn; + ar_beat_t ar_txn; + w_beat_t w_txn; + + ace_bus_t ace; + + mailbox #(aw_beat_t) aw_mbx; + mailbox #(w_beat_t) w_mbx; + mailbox #(ar_beat_t) ar_mbx; + + function new( + ace_bus_t ace, + mailbox #(aw_beat_t) aw_mbx, + mailbox #(w_beat_t) w_mbx, + mailbox #(ar_beat_t) ar_mbx + ); + this.ace = ace; + + this.aw_mbx = aw_mbx; + this.ar_mbx = ar_mbx; + this.w_mbx = w_mbx; + endfunction + + task cycle_start; + #TT; + endtask + + task cycle_end; + @(posedge ace.clk_i); + endtask + + task run(); + cycle_end(); + fork + forever begin + if (aw_mbx.try_get(aw_txn)) send_aw(aw_txn); + else cycle_end(); + end + forever begin + if (w_mbx.try_get(w_txn)) send_w(w_txn); + else cycle_end(); + end + forever begin + if (ar_mbx.try_get(ar_txn)) send_ar(ar_txn); + else cycle_end(); + end + forever recv_r(); + forever recv_b(); + join + endtask + + task reset(); + ace.aw_id <= '0; + ace.aw_addr <= '0; + ace.aw_len <= '0; + ace.aw_size <= '0; + ace.aw_burst <= '0; + ace.aw_lock <= '0; + ace.aw_cache <= '0; + ace.aw_prot <= '0; + ace.aw_qos <= '0; + ace.aw_region <= '0; + ace.aw_atop <= '0; + ace.aw_user <= '0; + ace.aw_valid <= '0; + ace.aw_snoop <= '0; + ace.aw_bar <= '0; + ace.aw_domain <= '0; + ace.aw_awunique <= '0; + ace.w_data <= '0; + ace.w_strb <= '0; + ace.w_last <= '0; + ace.w_user <= '0; + ace.w_valid <= '0; + ace.b_ready <= '0; + ace.ar_id <= '0; + ace.ar_addr <= '0; + ace.ar_len <= '0; + ace.ar_size <= '0; + ace.ar_burst <= '0; + ace.ar_lock <= '0; + ace.ar_cache <= '0; + ace.ar_prot <= '0; + ace.ar_qos <= '0; + ace.ar_region <= '0; + ace.ar_user <= '0; + ace.ar_snoop <= '0; + ace.ar_bar <= '0; + ace.ar_domain <= '0; + ace.ar_valid <= '0; + ace.r_ready <= '0; + ace.wack <= '0; + ace.rack <= '0; + endtask + + task send_aw ( + input aw_beat_t beat + ); + ace.aw_id <= #TA beat.id; + ace.aw_addr <= #TA beat.addr; + ace.aw_len <= #TA beat.len; + ace.aw_size <= #TA beat.size; + ace.aw_burst <= #TA beat.burst; + ace.aw_lock <= #TA beat.lock; + ace.aw_cache <= #TA beat.cache; + ace.aw_prot <= #TA beat.prot; + ace.aw_qos <= #TA beat.qos; + ace.aw_region <= #TA beat.region; + ace.aw_atop <= #TA beat.atop; + ace.aw_user <= #TA beat.user; + ace.aw_valid <= #TA 1; + ace.aw_snoop <= #TA beat.snoop; + ace.aw_bar <= #TA beat.bar; + ace.aw_domain <= #TA beat.domain; + ace.aw_awunique <= #TA beat.awunique; + cycle_start(); + while (ace.aw_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.aw_id <= #TA '0; + ace.aw_addr <= #TA '0; + ace.aw_len <= #TA '0; + ace.aw_size <= #TA '0; + ace.aw_burst <= #TA '0; + ace.aw_lock <= #TA '0; + ace.aw_cache <= #TA '0; + ace.aw_prot <= #TA '0; + ace.aw_qos <= #TA '0; + ace.aw_region <= #TA '0; + ace.aw_atop <= #TA '0; + ace.aw_user <= #TA '0; + ace.aw_valid <= #TA 0; + ace.aw_snoop <= #TA '0; + ace.aw_bar <= #TA '0; + ace.aw_domain <= #TA '0; + ace.aw_awunique <= #TA 0; + endtask + + /// Issue a beat on the AR channel. + task send_ar ( + input ar_beat_t beat + ); + ace.ar_id <= #TA beat.id; + ace.ar_addr <= #TA beat.addr; + ace.ar_len <= #TA beat.len; + ace.ar_size <= #TA beat.size; + ace.ar_burst <= #TA beat.burst; + ace.ar_lock <= #TA beat.lock; + ace.ar_cache <= #TA beat.cache; + ace.ar_prot <= #TA beat.prot; + ace.ar_qos <= #TA beat.qos; + ace.ar_region <= #TA beat.region; + ace.ar_user <= #TA beat.user; + ace.ar_valid <= #TA 1; + ace.ar_snoop <= #TA beat.snoop; + ace.ar_bar <= #TA beat.bar; + ace.ar_domain <= #TA beat.domain; + cycle_start(); + while (ace.ar_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.ar_id <= #TA '0; + ace.ar_addr <= #TA '0; + ace.ar_len <= #TA '0; + ace.ar_size <= #TA '0; + ace.ar_burst <= #TA '0; + ace.ar_lock <= #TA '0; + ace.ar_cache <= #TA '0; + ace.ar_prot <= #TA '0; + ace.ar_qos <= #TA '0; + ace.ar_region <= #TA '0; + ace.ar_user <= #TA '0; + ace.ar_valid <= #TA '0; + ace.ar_snoop <= #TA '0; + ace.ar_bar <= #TA '0; + ace.ar_domain <= #TA '0; + endtask + + /// Issue a beat on the W channel. + task send_w ( + input w_beat_t beat + ); + ace.w_data <= #TA beat.data; + ace.w_strb <= #TA beat.strb; + ace.w_last <= #TA beat.last; + ace.w_user <= #TA beat.user; + ace.w_valid <= #TA 1; + cycle_start(); + while (ace.w_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.w_data <= #TA '0; + ace.w_strb <= #TA '0; + ace.w_last <= #TA '0; + ace.w_user <= #TA '0; + ace.w_valid <= #TA 0; + endtask + + task recv_r; + ace.r_ready <= #TA 1; + cycle_start(); + while (!(ace.r_valid && ace.r_last)) begin + cycle_end(); cycle_start(); + end + cycle_end(); + ace.r_ready <= #TA 0; + ace.rack <= #TA 1; + cycle_start(); cycle_end(); + ace.rack <= #TA 0; + endtask + + /// Wait for a beat on the B channel. + task recv_b (); + ace.b_ready <= #TA 1; + cycle_start(); + while (ace.b_valid != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.b_ready <= #TA 0; + ace.wack <= #TA 1; + cycle_start(); cycle_end(); + ace.wack <= #TA 0; + endtask + +endclass diff --git a/test/vip/ace/ace_monitor.svh b/test/vip/ace/ace_monitor.svh new file mode 100644 index 0000000..18e1bed --- /dev/null +++ b/test/vip/ace/ace_monitor.svh @@ -0,0 +1,85 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif +class ace_monitor #( + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type ace_bus_t = logic, + parameter type ar_beat_t = logic, + parameter type r_beat_t = logic, + parameter type b_beat_t +); + + ace_bus_t ace; + + mailbox #(ar_beat_t) ar_mbx; + mailbox #(r_beat_t) r_mbx; + mailbox #(b_beat_t) b_mbx; + + task cycle_start; + #TT; + endtask + + task cycle_end; + @(posedge ace.clk_i); + endtask + + function new( + ace_bus_t ace, + mailbox #(ar_beat_t) ar_mbx, + mailbox #(r_beat_t) r_mbx, + mailbox #(b_beat_t) b_mbx + ); + this.ace = ace; + + this.ar_mbx = ar_mbx; + this.r_mbx = r_mbx; + this.b_mbx = b_mbx; + + endfunction + + task mon_r (output r_beat_t beat); + cycle_start(); + while (!(ace.r_valid && ace.r_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.id = ace.r_id; + beat.data = ace.r_data; + beat.resp = ace.r_resp; + beat.last = ace.r_last; + beat.user = ace.r_user; + cycle_end(); + endtask + + task mon_b (output b_beat_t beat); + cycle_start(); + while (!(ace.b_valid && ace.b_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.id = ace.b_id; + beat.resp = ace.b_resp; + beat.user = ace.b_user; + cycle_end(); + endtask + + task recv_rs; + forever begin + r_beat_t beat; + mon_r(beat); + r_mbx.put(beat); + end + endtask + + task recv_bs; + forever begin + b_beat_t beat; + mon_b(beat); + b_mbx.put(beat); + end + endtask + + task run; + fork + forever recv_rs(); + forever recv_bs(); + join + endtask +endclass diff --git a/test/vip/ace/ace_sequencer.svh b/test/vip/ace/ace_sequencer.svh new file mode 100644 index 0000000..ba20c02 --- /dev/null +++ b/test/vip/ace/ace_sequencer.svh @@ -0,0 +1,216 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif + +virtual class ace_sequencer #( + parameter AW = 32, + parameter DW = 32, + parameter IW = 8, + parameter UW = 1, + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type w_beat_t = logic +); + + // Input mailboxes + mailbox #(aw_beat_t) aw_mbx_i; + mailbox #(ar_beat_t) ar_mbx_i; + mailbox #(w_beat_t) w_mbx_i; + + // Output mailboxes + mailbox #(aw_beat_t) aw_mbx_o; + mailbox #(ar_beat_t) ar_mbx_o; + mailbox #(w_beat_t) w_mbx_o; + + // Clock interface needed for generating delays + // between sending transactions + virtual CLK_IF clk_if; + + function new( + virtual CLK_IF clk_if, + mailbox #(aw_beat_t) aw_mbx_o, + mailbox #(w_beat_t) w_mbx_o, + mailbox #(ar_beat_t) ar_mbx_o + ); + this.clk_if = clk_if; + + this.aw_mbx_o = aw_mbx_o; + this.ar_mbx_o = ar_mbx_o; + this.w_mbx_o = w_mbx_o; + + endfunction + + task automatic rand_wait(input int unsigned min, max); + int unsigned rand_success, cycles; + cycles = $urandom_range(min, max); + repeat (cycles) begin + @(posedge this.clk_if.clk_i); + end + endtask + +endclass + +// Class which generates random sequences +class ace_rand_sequencer #( + parameter AW = 32, + parameter DW = 32, + parameter IW = 8, + parameter UW = 1, + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type w_beat_t = logic +) extends ace_sequencer #( + .AW(AW), .DW(DW), .IW(IW), .UW(UW), + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .w_beat_t(w_beat_t) +); + + function aw_beat_t create_aw(); + aw_beat_t beat = new; + beat.addr = $urandom(); + beat.burst = axi_pkg::BURST_WRAP; + beat.size = $clog2(DW); + beat.len = 3; + beat.id = '0; + beat.qos = '0; + beat.snoop = ace_pkg::WriteUnique; + beat.bar = '0; + beat.domain = 'b1; + beat.awunique = '0; + return beat; + endfunction + + function ar_beat_t create_ar(); + ar_beat_t beat = new; + beat.addr = $urandom(); + beat.burst = axi_pkg::BURST_WRAP; + beat.size = $clog2(DW); + beat.len = 3; + beat.id = '0; + beat.qos = '0; + beat.snoop = ace_pkg::ReadShared; + beat.bar = '0; + beat.domain = 'b1; + return beat; + endfunction + + function w_beat_t create_w(); + w_beat_t beat = new; + beat.data = $urandom(); + beat.strb = '1; + beat.last = '0; + return beat; + endfunction + + task send_aws(); + aw_beat_t aw_txn = new; + repeat (10) begin + rand_wait(2, 20); + aw_txn = create_aw(); + aw_mbx_o.put(aw_txn); + end + endtask + + task send_ws(); + w_beat_t w_txn = new; + repeat (10) begin + for (int i = 0; i < 4; i++) begin + rand_wait(2, 20); + w_txn = create_w(); + if (i == 3) w_txn.last = '1; + w_mbx_o.put(w_txn); + end + end + endtask + + task send_ars(); + ar_beat_t ar_txn = new; + repeat (10) begin + rand_wait(2, 20); + ar_txn = create_ar(); + ar_mbx_o.put(ar_txn); + end + endtask + + task run(); + send_aws(); + send_ws(); + send_ars(); + endtask + +endclass + +// Class which generates sequences when detected in +// input mailboxes +class ace_mbox_sequencer #( + parameter AW = 32, + parameter DW = 32, + parameter IW = 8, + parameter UW = 1, + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type w_beat_t = logic, + parameter RAND_WAIT = 1 +) extends ace_sequencer #( + .AW(AW), .DW(DW), .IW(IW), .UW(UW), + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .w_beat_t(w_beat_t) +); + + function new( + virtual CLK_IF clk_if, + mailbox #(aw_beat_t) aw_mbx_o, + mailbox #(w_beat_t) w_mbx_o, + mailbox #(ar_beat_t) ar_mbx_o, + mailbox #(aw_beat_t) aw_mbx_i, + mailbox #(w_beat_t) w_mbx_i, + mailbox #(ar_beat_t) ar_mbx_i + ); + super.new(clk_if, aw_mbx_o, w_mbx_o, ar_mbx_o); + this.aw_mbx_i = aw_mbx_i; + this.ar_mbx_i = ar_mbx_i; + this.w_mbx_i = w_mbx_i; + endfunction + + task wait_for_aws; + aw_beat_t aw_beat; + forever begin + aw_mbx_i.get(aw_beat); + if (RAND_WAIT) rand_wait(2, 20); + aw_mbx_o.put(aw_beat); + end + endtask + + task wait_for_ars; + ar_beat_t ar_beat; + forever begin + ar_mbx_i.get(ar_beat); + if (RAND_WAIT) rand_wait(2, 20); + ar_mbx_o.put(ar_beat); + end + endtask + + task wait_for_ws; + w_beat_t w_beat; + forever begin + w_mbx_i.get(w_beat); + if (RAND_WAIT) rand_wait(2, 20); + w_mbx_o.put(w_beat); + end + endtask + + task gen_txns_from_mbox; + fork + wait_for_aws(); + wait_for_ws(); + wait_for_ars(); + join + endtask + + task run(); + gen_txns_from_mbox(); + endtask + +endclass \ No newline at end of file diff --git a/test/vip/ace_test_pkg.sv b/test/vip/ace_test_pkg.sv new file mode 100644 index 0000000..424c69b --- /dev/null +++ b/test/vip/ace_test_pkg.sv @@ -0,0 +1,8 @@ +package ace_test_pkg; +`define _ACE_TEST_PKG +`include "ace/ace_beat_types.svh" +`include "ace/ace_driver.svh" +`include "ace/ace_monitor.svh" +`include "ace/ace_sequencer.svh" +`include "ace/ace_agent.svh" +endpackage \ No newline at end of file diff --git a/test/vip/cache/cache_beat_types.svh b/test/vip/cache/cache_beat_types.svh new file mode 100644 index 0000000..791dc42 --- /dev/null +++ b/test/vip/cache/cache_beat_types.svh @@ -0,0 +1,47 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif + + +// Cache Requester operations +localparam int REQ_LOAD = 5'b00000; +localparam int REQ_STORE = 5'b00001; + +// Cache Memory operations +localparam int MEM_READ = 3'b000; +localparam int MEM_WRITE = 3'b001; +//localparam int MEM_ATOMIC = 3'b010; + +/// Datatype to orchestrate cache read and write requests +class cache_req; + int unsigned addr = 0; + logic [7:0] data_q[$]; + int unsigned op = REQ_LOAD; + bit cached = 0; + int unsigned shareability = 0; + int unsigned size = 0; + int unsigned timestamp = 0; +endclass + +/// Datatype to orchestrate cache lookups between +/// cache sequencer and cache scoreboard +class cache_resp; + logic [7:0] data_q[$]; +endclass + +class mem_req; + int unsigned addr = 0; + int unsigned len = 0; + int unsigned size = 0; + int unsigned op = MEM_READ; + logic [7:0] data_q[$]; + int unsigned cacheable = 0; + ace_pkg::arsnoop_t read_snoop_op = ace_pkg::ReadShared; + ace_pkg::awsnoop_t write_snoop_op = ace_pkg::WriteBack; +endclass + +class mem_resp; + logic [7:0] data_q[$]; + bit is_shared = 0; + bit pass_dirty = 0; +endclass diff --git a/test/vip/cache/cache_scoreboard.svh b/test/vip/cache/cache_scoreboard.svh new file mode 100644 index 0000000..8a8c42b --- /dev/null +++ b/test/vip/cache/cache_scoreboard.svh @@ -0,0 +1,575 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class cache_scoreboard #( + /// Address space + parameter int AW = 32, + /// Width of the memory bus + parameter int DW = 32, + /// Width of one cache word + parameter int WORD_WIDTH = 0, + /// How many words per cache line + parameter int CACHELINE_WORDS = 0, + /// How many ways per set + parameter int WAYS = 0, + /// How many sets + parameter int SETS = 0, + /// Clock interface type + parameter type clk_if_t = logic +); + + localparam int BYTES_PER_WORD = DW / 8; + localparam int CACHELINE_BYTES = CACHELINE_WORDS * WORD_WIDTH / 8; + localparam int BLOCK_OFFSET_BITS = $clog2(CACHELINE_BYTES); + localparam int INDEX_BITS = $clog2(SETS); + localparam int TAG_BITS = AW - BLOCK_OFFSET_BITS - INDEX_BITS; + + localparam int VALID_IDX = 0; + localparam int SHARD_IDX = 1; + localparam int DIRTY_IDX = 2; + + int INDEX = -1; + + typedef logic [TAG_BITS-1:0] tag_t; + typedef logic [AW-1:0] addr_t; + typedef logic [7:0] byte_t; + typedef logic [2:0] status_t; + typedef logic [$clog2(WAYS)-1:0] lru_rank_t; + typedef logic [INDEX_BITS-1:0] idx_t; + + // Data structure for carrying cache request information + // It also monitors all cache modifications so that they can + // be executed at once and logged easily. + typedef struct { + // Cache hit + logic hit; + // Status of the old cache line + status_t status; + // Way index for hit or replacement + int way; + // Set index of the old cache line + idx_t idx; + // Tag of the old cache line + tag_t tag; + // Cacheline-aligned address of the old cache line + addr_t addr; + // Cacheline-aligned address of the new cache line + addr_t new_addr; + // Byte index within the cache line + logic [BLOCK_OFFSET_BITS-1:0] byte_idx; + // New cache line to be stored + byte_t new_cline [CACHELINE_BYTES]; + // New status for the cache line + status_t new_status; + // New tag for the cache line + tag_t new_tag; + } tag_resp_t; + + byte_t data_q[SETS][WAYS][CACHELINE_BYTES]; // Cache data + status_t status_q[SETS][WAYS]; // Cache state + tag_t tag_q[SETS][WAYS]; // Cache tag + lru_rank_t lru_rank_q[SETS][WAYS]; // LRU ranks + + // Semaphore to ensure only one process accesses the cache at a time + // The two processes are cache requests and snoop requests + // TODO: figure the critical point where using this is necessary + // ATM it is not used + semaphore cache_lookup_sem; + + // Interface to provide simulation clock + clk_if_t clk_if; + + string state_file; + logic first_write = '1; + + // Mailboxes for cache requests + mailbox #(cache_req) cache_req_mbx; + mailbox #(cache_resp) cache_resp_mbx; + // Mailboxes for snoop requests + mailbox #(cache_snoop_req) snoop_req_mbx; + mailbox #(cache_snoop_resp) snoop_resp_mbx; + // Mailboxes for memory requests + mailbox #(mem_req) mem_req_mbx; + mailbox #(mem_resp) mem_resp_mbx; + + function new( + clk_if_t clk_if, + mailbox #(cache_req) cache_req_mbx, + mailbox #(cache_resp) cache_resp_mbx, + mailbox #(cache_snoop_req) snoop_req_mbx, + mailbox #(cache_snoop_resp) snoop_resp_mbx, + mailbox #(mem_req) mem_req_mbx, + mailbox #(mem_resp) mem_resp_mbx, + string state_file, + int index + ); + this.clk_if = clk_if; + this.cache_req_mbx = cache_req_mbx; + this.cache_resp_mbx = cache_resp_mbx; + this.snoop_req_mbx = snoop_req_mbx; + this.snoop_resp_mbx = snoop_resp_mbx; + this.mem_req_mbx = mem_req_mbx; + this.mem_resp_mbx = mem_resp_mbx; + this.state_file = state_file; + this.INDEX = index; + + this.cache_lookup_sem = new(1); + endfunction + + function void init_data_mem_from_file( + string fname + ); + $readmemh(fname, data_q); + endfunction + + function void init_tag_mem_from_file( + string fname + ); + $readmemh(fname, tag_q); + endfunction + + function void init_status_from_file( + string fname + ); + // Initialize all to zeros + for (int set = 0; set < SETS; set++) begin + for (int way = 0; way < WAYS; way++) begin + status_q[set][way] = '0; + lru_rank_q[set][way] = '0; + end + end + // Read initial values from file + $readmemb(fname, status_q); + endfunction + + function void init_mem_from_file( + string data_fname, + string tag_fname, + string status_fname + ); + init_data_mem_from_file(data_fname); + init_tag_mem_from_file(tag_fname); + init_status_from_file(status_fname); + endfunction + + function automatic void log_state_change( + bit initiator, + int unsigned addr, + int unsigned set, + int unsigned way, + tag_t new_tag, + status_t new_status, + byte_t new_data[CACHELINE_BYTES], + bit modify + ); + int fd; + if (first_write) fd = $fopen(this.state_file, "w"); + else fd = $fopen(this.state_file, "a"); + first_write = 0; + $fwrite(fd, "TIME:%0t ADDR:%x INITIATOR:%0d", $time, addr, initiator); + if (modify) begin + $fwrite(fd, " SET:%0d WAY:%0d TAG:%x STATUS:%b DATA:[", + set, way, new_tag, new_status); + for (int i = 0; i < CACHELINE_BYTES; i++) begin + if (i == 0) + $fwrite(fd, "%x", new_data[i]); + else + $fwrite(fd, ",%x", new_data[i]); + end + $fwrite(fd, "]"); + end + $fwrite(fd, "\n"); + $fclose(fd); + endfunction + + // Atomic function for all cache writes + // Cache state is saved optionally + // NO OTHER FUNCTION SHOULD MODIFY THE CACHE + // initiator = 1 when "core" modifies the cache + // initiator = 0 when cache is modified by snooping + function automatic void modify_cache( + tag_resp_t info, bit initiator, bit modify + ); + if (modify) begin + data_q[info.idx][info.way] = info.new_cline; + status_q[info.idx][info.way] = info.new_status; + tag_q[info.idx][info.way] = info.new_tag; + update_lru(info); + end + log_state_change( + initiator, + info.new_addr, + info.idx, + info.way, + tag_q[info.idx][info.way], + status_q[info.idx][info.way], + data_q[info.idx][info.way], + modify + ); + endfunction + + function automatic void update_lru(tag_resp_t info); + for (int way = 0; way < WAYS; way++) begin + if (way == info.way) begin + lru_rank_q[info.idx][way] = WAYS-1; + end else begin + if (lru_rank_q[info.idx][way] != '0) begin + lru_rank_q[info.idx][way]--; + end + end + end + endfunction + + function automatic tag_resp_t read_and_compare_tag(addr_t addr); + tag_resp_t resp; + tag_t lu_tag; + status_t status; + logic hit = '0; + logic invalid_found = '0; + int way; + int i; + idx_t idx = addr[BLOCK_OFFSET_BITS+INDEX_BITS-1:BLOCK_OFFSET_BITS]; + tag_t tag = addr[AW-1:AW-TAG_BITS]; + for (int i = 0; i < WAYS; i++) begin + lu_tag = tag_q[idx][i]; + if (!status_q[idx][i][VALID_IDX]) begin + way = i; + invalid_found = '1; + end else if (!invalid_found && lru_rank_q[idx][i] == 0) begin + // Least recently used + way = i; + end + if (tag == lu_tag && status_q[idx][i][VALID_IDX]) begin + way = i; + hit = 'b1; + break; + end + end + resp.hit = hit; + resp.idx = idx; + resp.way = way; + resp.status = status_q[idx][way]; + resp.tag = tag_q[idx][way]; + resp.addr = {tag_q[idx][way], idx, {BLOCK_OFFSET_BITS{1'b0}}}; + resp.byte_idx = addr[BLOCK_OFFSET_BITS-1:0]; + resp.new_addr = {addr[AW-1:BLOCK_OFFSET_BITS], {BLOCK_OFFSET_BITS{1'b0}}}; + resp.new_tag = tag; + resp.new_status = status_q[idx][way]; + resp.new_cline = data_q[idx][way]; + return resp; + endfunction + + function automatic cache_resp cache_read(tag_resp_t info, cache_req req); + int unsigned n_bytes = 1 << req.size; + cache_resp resp = new; + logic [BLOCK_OFFSET_BITS-1:0] byte_idx = info.byte_idx; + for (int i = 0; i < n_bytes; i++) begin + resp.data_q.push_back(data_q[info.idx][info.way][byte_idx]); + byte_idx++; + end + return resp; + endfunction + + function automatic void cache_write( + ref tag_resp_t info, ref byte_t data_q[$] + ); + logic [BLOCK_OFFSET_BITS-1:0] byte_idx = info.byte_idx; + while (data_q.size() > 0) begin + info.new_cline[byte_idx] = data_q.pop_front(); + byte_idx++; + end + endfunction + + function automatic void cache_evict(ref tag_resp_t info); + info.new_status[VALID_IDX] = 1'b0; + endfunction + + + function automatic mem_req gen_write_back(tag_resp_t info); + mem_req mem_req = new; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = CACHELINE_WORDS - 1; + mem_req.addr = info.addr; + mem_req.op = MEM_WRITE; + mem_req.write_snoop_op = ace_pkg::WriteBack; + for (int i = 0; i < CACHELINE_WORDS; i++) begin + for (int j = 0; j < BYTES_PER_WORD; j++) begin + mem_req.data_q.push_back( + data_q[info.idx][info.way][i*BYTES_PER_WORD+j]); + end + end + return mem_req; + endfunction + + function automatic mem_req gen_read_allocate(tag_resp_t info, cache_req req); + mem_req mem_req = new; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = CACHELINE_WORDS - 1; + mem_req.addr = info.new_addr; + mem_req.op = MEM_READ; + mem_req.cacheable = '1; + if (req.op == REQ_STORE) begin + mem_req.read_snoop_op = ace_pkg::ReadUnique; + end else begin + mem_req.read_snoop_op = ace_pkg::ReadShared; + end + return mem_req; + endfunction + + function automatic mem_req gen_clean_unique(tag_resp_t info); + mem_req mem_req = new; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = CACHELINE_WORDS - 1; + mem_req.addr = info.new_addr; + mem_req.op = MEM_READ; + mem_req.cacheable = '1; + mem_req.read_snoop_op = ace_pkg::CleanUnique; + return mem_req; + endfunction + + function automatic mem_req gen_write_line_unique(tag_resp_t info, cache_req req); + // Merge with write word + mem_req mem_req = new; + logic [BLOCK_OFFSET_BITS-1:0] byte_idx = info.byte_idx; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = CACHELINE_WORDS - 1; + mem_req.addr = info.new_addr; + mem_req.op = MEM_WRITE; + mem_req.cacheable = '1; + mem_req.write_snoop_op = ace_pkg::WriteLineUnique; + for (int i = 0; i < CACHELINE_WORDS; i++) begin + for (int j = 0; j < BYTES_PER_WORD; j++) begin + mem_req.data_q.push_back( + data_q[info.idx][info.way][i*BYTES_PER_WORD+j]); + end + end + return mem_req; + endfunction + + function automatic mem_req gen_write_unique(cache_req req); + mem_req mem_req = new; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = 0; + mem_req.addr = req.addr; + mem_req.op = MEM_WRITE; + mem_req.cacheable = '1; + mem_req.write_snoop_op = ace_pkg::WriteUnique; + for (int i = 0; i < BYTES_PER_WORD; i++) begin + mem_req.data_q.push_back(req.data_q.pop_front()); + end + return mem_req; + endfunction + + function automatic void allocate(mem_req req, mem_resp resp, ref tag_resp_t info); + info.new_status[DIRTY_IDX] = resp.pass_dirty; + info.new_status[SHARD_IDX] = resp.is_shared; + info.new_status[VALID_IDX] = 1'b1; + info.byte_idx = 0; // Cache line allocations are always cacheline-aligned + cache_write(info, resp.data_q); + endfunction; + + task automatic snoop(input cache_snoop_req req, output cache_snoop_resp resp); + tag_resp_t tag_lu; + cache_resp cache_resp; + resp = new; + tag_lu = read_and_compare_tag(req.addr); + resp.snoop_resp.Error = 1'b0; + if (tag_lu.hit) begin + cache_req cache_req = new; + cache_req.addr = req.addr; + cache_req.size = $clog2(CACHELINE_BYTES); + cache_resp = cache_read(tag_lu, cache_req); + resp.snoop_resp.WasUnique = !tag_lu.status[SHARD_IDX]; + while (cache_resp.data_q.size() > 0) begin + logic [7:0] data = cache_resp.data_q.pop_front(); + resp.data_q.push_back(data); + end + case (req.snoop_op) + ace_pkg::ReadOnce: begin + resp.snoop_resp.DataTransfer = 1'b1; + resp.snoop_resp.IsShared = 1'b1; + resp.snoop_resp.PassDirty = 1'b0; + end + ace_pkg::ReadClean, ace_pkg::ReadNotSharedDirty: begin + // recommended to pass clean + resp.snoop_resp.DataTransfer = 1'b1; + resp.snoop_resp.IsShared = 1'b1; + resp.snoop_resp.PassDirty = 1'b0; + tag_lu.new_status[SHARD_IDX] = 1'b1; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::ReadShared: begin + // recommended to pass dirty + resp.snoop_resp.DataTransfer = 1'b1; + resp.snoop_resp.IsShared = 1'b1; + tag_lu.new_status[SHARD_IDX] = 1'b1; + resp.snoop_resp.PassDirty = tag_lu.status[DIRTY_IDX]; + tag_lu.new_status[DIRTY_IDX] = 1'b0; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::ReadUnique: begin + // data transfer and invalidate + resp.snoop_resp.DataTransfer = 1'b1; + resp.snoop_resp.IsShared = 1'b0; + resp.snoop_resp.PassDirty = tag_lu.status[DIRTY_IDX]; + tag_lu.new_status[VALID_IDX] = 1'b0; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::CleanInvalid: begin + // data transfer dirty and invalidate + resp.snoop_resp.DataTransfer = tag_lu.status[DIRTY_IDX]; + resp.snoop_resp.IsShared = 1'b0; + resp.snoop_resp.PassDirty = tag_lu.status[DIRTY_IDX]; + tag_lu.new_status[VALID_IDX] = 1'b0; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::MakeInvalid: begin + // invalidate + resp.snoop_resp.DataTransfer = 1'b0; + resp.snoop_resp.IsShared = 1'b0; + resp.snoop_resp.PassDirty = 1'b0; + tag_lu.new_status[VALID_IDX] = 1'b0; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::CleanShared: begin + // pass dirty + resp.snoop_resp.DataTransfer = tag_lu.status[DIRTY_IDX]; + resp.snoop_resp.IsShared = 1'b1; + resp.snoop_resp.PassDirty = tag_lu.status[DIRTY_IDX]; + tag_lu.new_status[DIRTY_IDX] = 1'b0; + tag_lu.new_status[SHARD_IDX] = 1'b1; + modify_cache(tag_lu, 0, 1); + end + default: $fatal(1, "Unsupported snoop op!"); + endcase + end else begin + resp.snoop_resp.WasUnique = 1'b0; + resp.snoop_resp.DataTransfer = 1'b0; + resp.snoop_resp.IsShared = 1'b0; + resp.snoop_resp.PassDirty = 1'b0; + end + endtask + + task automatic cache_fsm(input cache_req req, output cache_resp resp); + bit cache_modified = 1; + tag_resp_t tag_lu; + mem_req mem_req = new; + mem_resp mem_resp; + resp = new; + mem_req.cacheable = '1; + //cache_lookup_sem.get(1); + tag_lu = read_and_compare_tag(req.addr); + if (tag_lu.hit) begin + if (req.op == REQ_LOAD) begin + resp = cache_read(tag_lu, req); + end else if (req.op == REQ_STORE) begin + if (req.cached && tag_lu.status[SHARD_IDX]) begin + // Make unique + mem_req = gen_clean_unique(tag_lu); + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + allocate(mem_req, mem_resp, tag_lu); + end + cache_write(tag_lu, req.data_q); + if (req.cached) begin + tag_lu.new_status[DIRTY_IDX] = 1'b1; + end else begin + mem_req = gen_write_line_unique(tag_lu, req); + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + cache_evict(tag_lu); + end + end else begin + $fatal("Unsupported op"); + end + end else begin + if (req.cached) begin + if (tag_lu.status[DIRTY_IDX] && + tag_lu.status[VALID_IDX]) begin + // Generate write-back request + mem_req = gen_write_back(tag_lu); + // Send request and wait for response + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + end + // Generate read request for new cache line + mem_req = gen_read_allocate(tag_lu, req); + // Send request and wait for response + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + // Allocate cache line for the new entry + allocate(mem_req, mem_resp, tag_lu); + // Handle the initial cache request + if (req.op == REQ_LOAD) begin + resp = cache_read(tag_lu, req); + end else if (req.op == REQ_STORE) begin + cache_write(tag_lu, req.data_q); + tag_lu.new_status[DIRTY_IDX] = 1'b1; + end else begin + $fatal("Unsupported op"); + end + end else begin + cache_modified = 0; + mem_req = gen_write_unique(req); + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + end + end + modify_cache(tag_lu, 1, cache_modified); + //cache_resp_mbx.put(resp); + //cache_lookup_sem.put(1); + endtask + + task recv_cache_req; + cache_req req; + cache_resp resp = new; + cache_req_mbx.get(req); + @(posedge clk_if.clk_i); + cache_fsm(req, resp); + cache_resp_mbx.put(resp); + endtask + + task recv_snoop_req; + cache_snoop_req req; + cache_snoop_resp resp = new; + snoop_req_mbx.get(req); + snoop(req, resp); + snoop_resp_mbx.put(resp); + endtask + + // Handle one request per clock cycle + // Snooping gets priority + /* + task handle_reqs; + int snp_exists; + int c_req_exists; + cache_snoop_req snp_req; + cache_req c_req; + @(posedge clk_if.clk_i); + snp_exists = snoop_req_mbx.try_get(snp_req); + if (snp_exists != 0) begin + recv_snoop_req(snp_req); + end + c_req_exists = cache_req_mbx.try_get(c_req); + if (c_req_exists) begin + recv_cache_req(c_req); + end + endtask + */ + + task recv_cache_reqs; + forever recv_cache_req(); + endtask + + task recv_snoop_reqs; + forever recv_snoop_req(); + endtask + + task run; + fork + forever recv_cache_reqs(); + forever recv_snoop_reqs(); + join + endtask + +endclass diff --git a/test/vip/cache/cache_sequencer.svh b/test/vip/cache/cache_sequencer.svh new file mode 100644 index 0000000..c5bbf20 --- /dev/null +++ b/test/vip/cache/cache_sequencer.svh @@ -0,0 +1,151 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class cache_sequencer #( + parameter int AW = 32, + parameter int DW = 32, + parameter type clk_if_t = logic +); + + mailbox #(cache_req) cache_req_mbx; + mailbox #(cache_resp) cache_resp_mbx; + + byte delimiter = " "; + string txn_file; + int unsigned txns_remaining; + int unsigned clk_cnt = 0; + + // Interface to provide simulation clock + clk_if_t clk_if; + + function new( + clk_if_t clk_if, + mailbox #(cache_req) cache_req_mbx, + mailbox #(cache_resp) cache_resp_mbx, + string txn_file + ); + this.clk_if = clk_if; + this.cache_req_mbx = cache_req_mbx; + this.cache_resp_mbx = cache_resp_mbx; + this.txn_file = txn_file; + endfunction + + function automatic int parse_op(string op); + if (op == "REQ_LOAD") return REQ_LOAD; + else if (op == "REQ_STORE") return REQ_STORE; + else $fatal(1, "Illegal operation type found"); + endfunction + + function automatic cache_req parse_txn(string line); + cache_req req = new; + logic [DW-1:0] word; + string op; + int size; + op = get_next_word(line); + req.op = parse_op(op); + req.addr = get_next_word(line).atohex(); + word = get_next_word(line).atohex(); + for (int i = 0; i < (DW / 8); i++) begin + req.data_q.push_back(word[i*8 +: 8]); + end + req.size = get_next_word(line).atoi(); + req.cached = get_next_word(line).atoi(); + req.shareability = get_next_word(line).atoi(); + req.timestamp = get_next_word(line).atoi(); + return req; + endfunction + + // Calculates the size of the next word until the delimiter + function automatic int get_next_word_size(string line); + byte char = ""; + int len, i; + len = line.len(); + for (i = 0; i < len; i++) begin + char = line[i]; + if (char == this.delimiter) break; + end + return i; + endfunction + + // Returns the next word and removes it from ``line`` + function automatic string get_next_word(ref string line); + int wsize; + string word; + int line_len = line.len(); + wsize = get_next_word_size(line); + word = line.substr(0, wsize - 1); + line = line.substr(wsize + 1, line_len - 1); + return word.substr(5, word.len()-1); + endfunction + + function automatic int get_n_transactions; + int fd, ret; + string line; + int rows = 0; + fd = $fopen(this.txn_file, "r"); + if (fd) begin + while (!$feof(fd)) begin + ret = $fgets(line, fd); + if (line != "") rows++; + end + end else begin + $fatal("Could not open file %s", txn_file); + end + $fclose(fd); + return rows; + endfunction + + task gen_txns_from_file; + int fd, ret; + string line; + cache_req cache_req; + fd = $fopen(this.txn_file, "r"); + if (fd) begin + while (!$feof(fd)) begin + int mbx_size; + ret = $fgets(line, fd); + if (line != "") begin + cache_req = parse_txn(line); + send_req(cache_req); + end + end + end else begin + $fatal("Could not open file %s", txn_file); + end + $fclose(fd); + endtask + + task send_req(input cache_req req); + while (req.timestamp > clk_cnt) begin + @(posedge clk_if.clk_i); + end + cache_req_mbx.put(req); + endtask + + task recv_resps; + cache_resp cache_resp; + cache_resp_mbx.get(cache_resp); + txns_remaining--; + endtask + + task count_clocks; + forever begin + @(posedge clk_if.clk_i); + clk_cnt++; + end + endtask + + task run; + txns_remaining = get_n_transactions(); + fork + count_clocks(); + fork + gen_txns_from_file(); + while (txns_remaining != 0) begin + recv_resps(); + end + join + join_any + endtask + +endclass diff --git a/test/vip/cache/cache_top_agent.svh b/test/vip/cache/cache_top_agent.svh new file mode 100644 index 0000000..dbd1c38 --- /dev/null +++ b/test/vip/cache/cache_top_agent.svh @@ -0,0 +1,177 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class cache_top_agent #( + /// Address width + parameter AW = 32, + /// Data width + parameter DW = 32, + /// Snoop address width + parameter AC_AW = 32, + /// Snoop data width + parameter CD_DW = 32, + /// ID width + parameter IW = 8 , + /// User width + parameter UW = 1, + /// Stimuli application time + parameter time TA = 0ns, + /// Stimuli test time + parameter time TT = 0ns, + /// How many words in a cache line + parameter CACHELINE_WORDS = 4, + /// Width of a cacheline word + parameter WORD_WIDTH = 32, + /// How many ways in the cache + parameter WAYS = 4, + /// How many sets in the cache + parameter SETS = 1024, + /// ACE bus interface type + parameter type ace_bus_t = logic, + /// Clock interface type + parameter type clk_if_t = logic, + /// Snoop bus interface type + parameter type snoop_bus_t = logic +); + ace_bus_t ace; + snoop_bus_t snoop; + clk_if_t clk_if; + + typedef ace_test_pkg::ace_aw_beat #( + .AW(AW), .IW(IW), .UW(UW) + ) aw_beat_t; + + typedef ace_test_pkg::ace_ar_beat #( + .AW(AW), .IW(IW), .UW(UW) + ) ar_beat_t; + + typedef ace_test_pkg::ace_r_beat #( + .DW(DW), .IW(IW), .UW(UW) + ) r_beat_t; + + typedef ace_test_pkg::ace_w_beat #( + .DW(DW), .UW(UW) + ) w_beat_t; + + typedef ace_test_pkg::ace_b_beat #( + .IW(IW), .UW(UW) + ) b_beat_t; + + mailbox #(cache_req) cache_req_mbx = new(); + mailbox #(cache_resp) cache_resp_mbx = new(); + mailbox #(mem_req) mem_req_mbx = new(); + mailbox #(mem_resp) mem_resp_mbx = new(); + mailbox #(aw_beat_t) aw_mbx = new(); + mailbox #(w_beat_t) w_mbx = new(); + mailbox #(ar_beat_t) ar_mbx = new(); + mailbox #(r_beat_t) r_mbx = new(); + mailbox #(b_beat_t) b_mbx = new(); + mailbox #(cache_snoop_req) snoop_req_mbx = new(); + mailbox #(cache_snoop_resp) snoop_resp_mbx = new(); + + logic cache_seq_done = 1'b0; + + int unsigned os_cache_reqs = 0; + localparam int CachelineBytes = (CACHELINE_WORDS * WORD_WIDTH) / 8; + + ace_test_pkg::ace_agent #( + .AW(AW), .DW(DW), .IW(IW), .UW(UW), + .TA(TA), .TT(TT), + .ace_bus_t(ace_bus_t), + .clk_if_t(clk_if_t), + .aw_beat_t(aw_beat_t), + .w_beat_t(w_beat_t), + .ar_beat_t(ar_beat_t), + .r_beat_t(r_beat_t), + .b_beat_t(b_beat_t) + ) ace_agent; + + snoop_test_pkg::snoop_agent #( + .AW(AC_AW), .DW(CD_DW), + .TA(TA), .TT(TT), + .CACHELINE_BYTES(CachelineBytes), + .snoop_bus_t(snoop_bus_t), + .clk_if_t(clk_if_t) + ) snoop_agent; + + cache_scoreboard #( + .AW(AW), + .DW(DW), + .WORD_WIDTH(WORD_WIDTH), + .CACHELINE_WORDS(CACHELINE_WORDS), + .WAYS(WAYS), + .SETS(SETS), + .clk_if_t(clk_if_t) + ) cache_sb; + + cache_sequencer #( + .AW(AW), + .DW(DW), + .clk_if_t(clk_if_t) + ) cache_seq; + + mem_sequencer #( + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .r_beat_t(r_beat_t), + .w_beat_t(w_beat_t), + .b_beat_t(b_beat_t) + ) mem_seq; + + function new( + ace_bus_t ace, + snoop_bus_t snoop, + clk_if_t clk_if, + string data_mem_file, + string tag_mem_file, + string status_file, + string txn_file, + string state_file, + int index + ); + this.ace = ace; + this.snoop = snoop; + this.clk_if = clk_if; + + this.ace_agent = new(this.ace, this.clk_if, this.aw_mbx, + this.w_mbx, this.ar_mbx, this.r_mbx, + this.b_mbx); + this.snoop_agent = new(this.snoop, this.clk_if, + this.snoop_req_mbx, + this.snoop_resp_mbx); + this.cache_sb = new(this.clk_if, + this.cache_req_mbx, this.cache_resp_mbx, + this.snoop_req_mbx, this.snoop_resp_mbx, + this.mem_req_mbx, this.mem_resp_mbx, + state_file, index); + this.cache_seq = new(this.clk_if, + this.cache_req_mbx, this.cache_resp_mbx, txn_file); + this.mem_seq = new(this.mem_req_mbx, this.mem_resp_mbx, + this.aw_mbx, this.ar_mbx, this.r_mbx, + this.w_mbx, this.b_mbx); + + this.cache_sb.init_mem_from_file( + data_mem_file, + tag_mem_file, + status_file + ); + endfunction + + task reset; + fork + this.ace_agent.reset(); + this.snoop_agent.reset(); + join + endtask + + task run; + fork + this.ace_agent.run(); + this.snoop_agent.run(); + this.cache_seq.run(); + this.cache_sb.run(); + this.mem_seq.run(); + join_any + endtask + +endclass diff --git a/test/vip/cache/mem_logger.svh b/test/vip/cache/mem_logger.svh new file mode 100644 index 0000000..47732c5 --- /dev/null +++ b/test/vip/cache/mem_logger.svh @@ -0,0 +1,74 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class mem_logger #( + parameter int AW = 0, + parameter int DW = 0, + parameter int IW = 0, + parameter int UW = 0, + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type mon_bus_t = logic +); + + typedef logic [AW-1:0] addr_t; + typedef logic [DW-1:0] data_t; + typedef logic [7:0] byte_t; + + mon_bus_t mem_mon_bus; + + string log_file; + bit first_write = 1; + + function new( + mon_bus_t mon, + string log_file + ); + this.mem_mon_bus = mon; + this.log_file = log_file; + endfunction + + function void log_word( + addr_t addr, + data_t data + ); + int fd; + if (first_write) fd = $fopen(log_file, "w"); + else fd = $fopen(log_file, "a"); + first_write = 0; + for (int i = 0; i < DW / 8; i++) begin + addr_t byte_addr = addr + i; + byte_t byte_data = data[i*8 +: 8]; + $fwrite(fd, "ADDR:%x DATA:%x\n", byte_addr, byte_data); + end + $fclose(fd); + endfunction + + function void log_time(); + int fd; + if (first_write) fd = $fopen(log_file, "w"); + else fd = $fopen(log_file, "a"); + first_write = 0; + $fwrite(fd, "TIME:%0t\n", $time); + $fclose(fd); + endfunction + + task recv_writes; + addr_t w_addr; + byte_t data[$]; + int unsigned beat_count = 0; + forever begin + @(posedge mem_mon_bus.clk_i); + if (mem_mon_bus.w_valid) begin + beat_count = mem_mon_bus.w_beat_count; + if (beat_count == 0) log_time(); + log_word(mem_mon_bus.w_addr, mem_mon_bus.w_data); + end + end + endtask + + task run; + recv_writes(); + endtask + +endclass diff --git a/test/vip/cache/mem_sequencer.svh b/test/vip/cache/mem_sequencer.svh new file mode 100644 index 0000000..35def96 --- /dev/null +++ b/test/vip/cache/mem_sequencer.svh @@ -0,0 +1,144 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class mem_sequencer #( + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type r_beat_t = logic, + parameter type w_beat_t = logic, + parameter type b_beat_t = logic +); + mailbox #(mem_req) mem_req_mbx; + mailbox #(mem_resp) mem_resp_mbx; + mailbox #(aw_beat_t) aw_mbx_o; + mailbox #(ar_beat_t) ar_mbx_o; + mailbox #(r_beat_t) r_mbx_o; + mailbox #(w_beat_t) w_mbx_o; + mailbox #(b_beat_t) b_mbx_o; + + function new( + mailbox #(mem_req) mem_req_mbx, + mailbox #(mem_resp) mem_resp_mbx, + mailbox #(aw_beat_t) aw_mbx_o, + mailbox #(ar_beat_t) ar_mbx_o, + mailbox #(r_beat_t) r_mbx_o, + mailbox #(w_beat_t) w_mbx_o, + mailbox #(b_beat_t) b_mbx_o + ); + this.mem_req_mbx = mem_req_mbx; + this.mem_resp_mbx = mem_resp_mbx; + this.aw_mbx_o = aw_mbx_o; + this.ar_mbx_o = ar_mbx_o; + this.r_mbx_o = r_mbx_o; + this.w_mbx_o = w_mbx_o; + this.b_mbx_o = b_mbx_o; + endfunction + + function automatic axi_pkg::cache_t calc_cache(mem_req req); + if (!req.cacheable) begin + return '0; + end else begin + return axi_pkg::CACHE_BUFFERABLE | + axi_pkg::CACHE_MODIFIABLE; + end + endfunction + + function automatic ace_pkg::axdomain_t calc_domain(mem_req req); + if (!req.cacheable) begin + return ace_pkg::System; + end else begin + return ace_pkg::InnerShareable; + end + endfunction + + task recv_mem_req; + mem_req req; + mem_req_mbx.get(req); + if (req.op == MEM_WRITE) begin + send_aw_beat(req); + send_w_beats(req); + end else if (req.op == MEM_READ) begin + send_ar_beat(req); + end else begin + $fatal("Unsupported op!"); + end + endtask + + task send_aw_beat(input mem_req req); + aw_beat_t aw_beat = new; + aw_beat.addr = req.addr; + aw_beat.len = req.len; + aw_beat.size = req.size; + aw_beat.snoop = req.write_snoop_op; + aw_beat.burst = (req.len > 0) ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR; + aw_beat.domain = calc_domain(req); + aw_beat.cache = calc_cache(req); + aw_mbx_o.put(aw_beat); + endtask + + task send_w_beats(input mem_req req); + while (req.data_q.size() > 0) begin + w_beat_t w_beat = new; + for (int i = 0; i < (w_beat.DW / 8); i++) begin + w_beat.data[i*8 +: 8] = req.data_q.pop_front(); + end + w_beat.strb = '1; + w_beat.user = '0; + w_beat.last = (req.data_q.size() == 0); + w_mbx_o.put(w_beat); + end + endtask + + task send_ar_beat(input mem_req req); + ar_beat_t ar_beat = new; + ar_beat.addr = req.addr; + ar_beat.len = req.len; + ar_beat.size = req.size; + ar_beat.snoop = req.read_snoop_op; + ar_beat.burst = axi_pkg::BURST_WRAP; + ar_beat.domain = calc_domain(req); + ar_beat.cache = calc_cache(req); + ar_mbx_o.put(ar_beat); + endtask + + task recv_r_beats; + r_beat_t r_beat; + mem_resp resp = new; + do begin + r_mbx_o.get(r_beat); + for (int i = 0; i < (r_beat.DW / 8); i++) begin + resp.data_q.push_back(r_beat.data[i*8 +: 8]); + end + resp.is_shared = r_beat.resp[3]; + resp.pass_dirty = r_beat.resp[2]; + end while (!r_beat.last); + mem_resp_mbx.put(resp); + endtask + + task recv_b_beats; + b_beat_t b_beat; + mem_resp resp = new; + b_mbx_o.get(b_beat); + // Nothing to transfer in the response + mem_resp_mbx.put(resp); + endtask + + task recv_mem_reqs; + forever recv_mem_req(); + endtask + + task send_mem_resps; + fork + forever recv_r_beats(); + forever recv_b_beats();; + join + endtask + + task run; + fork + recv_mem_reqs(); + send_mem_resps(); + join + endtask + +endclass diff --git a/test/vip/cache_test_pkg.sv b/test/vip/cache_test_pkg.sv new file mode 100644 index 0000000..f7ac41d --- /dev/null +++ b/test/vip/cache_test_pkg.sv @@ -0,0 +1,10 @@ +package cache_test_pkg; +`define _CACHE_TEST_PKG +import snoop_test_pkg::*; +`include "cache/cache_beat_types.svh" +`include "cache/cache_sequencer.svh" +`include "cache/mem_sequencer.svh" +`include "cache/mem_logger.svh" +`include "cache/cache_scoreboard.svh" +`include "cache/cache_top_agent.svh" +endpackage diff --git a/test/vip/python/cache_coherency_test.py b/test/vip/python/cache_coherency_test.py new file mode 100644 index 0000000..6459e09 --- /dev/null +++ b/test/vip/python/cache_coherency_test.py @@ -0,0 +1,584 @@ +from cache_state import \ + CacheState, CachelineState, \ + CachelineStateEnum, CacheSetFullException, \ + StateBits +from math import log2 +from typing import List +from memory_state import MemoryState +from common import MemoryRange +from transactions import \ + CacheTransactionSequence, CacheTransaction, CacheReqOp +from random import random, randint, choice, sample +import os +import logging +import pdb +logger = logging.getLogger(__name__) + + +class CoherencyError(AssertionError): + pass + + +class CacheCoherencyTest: + def __init__( + self, + addr_width: int, + data_width: int, + word_width: int, + cacheline_words: int, + ways: int, + sets: int, + n_caches: int, + n_transactions: int, + target_dir: str, + check: bool, + debug: bool, + **kwargs + ): + + logging.basicConfig(filename='cache_python.log', filemode='w', level=logging.INFO) + + self.aw = addr_width + self.dw = data_width + self.word_width = word_width + self.cacheline_words = cacheline_words + self.ways = ways + self.sets = sets + self.n_caches = n_caches + self.n_transactions = n_transactions + self.target_dir = target_dir + self.check = check + self.debug = debug + + self.cacheline_bytes = \ + self.cacheline_words * self.word_width // 8 + + self.mem_ranges : list[MemoryRange] = [] + + @property + def caches(self) -> List[CacheState]: + if not hasattr(self, '_caches'): + self._caches = [] + for _ in range(0, self.n_caches): + cache = CacheState( + addr_width=self.aw, + data_width=self.dw, + word_width=self.word_width, + cacheline_words=self.cacheline_words, + ways=self.ways, + sets=self.sets + ) + cache.init_cache() + self._caches.append(cache) + return self._caches + @caches.setter + def caches(self, caches: List[CacheState]): + self._caches = caches + + @property + def mem_state(self) -> MemoryState: + if not hasattr(self, '_mem_state'): + if not self.mem_ranges: + raise Exception("Define self.mem_ranges!") + self._mem_state = MemoryState(self.mem_ranges) + return self._mem_state + @mem_state.setter + def mem_state(self, mem_state: MemoryState): + self._mem_state = mem_state + + @property + def transactions(self) -> List[CacheTransactionSequence]: + if not hasattr(self, '_transactions'): + if not self.mem_ranges: + raise Exception("Define self.mem_ranges!") + self._transactions = [] + for _ in range(self.n_caches): + self._transactions.append( + CacheTransactionSequence( + self.aw, self.dw, self.mem_ranges + ) + ) + return self._transactions + @transactions.setter + def transactions(self, txns: List[CacheTransactionSequence]): + self._transactions = txns + + def add_memory_range(self, memory_range: MemoryRange): + self.mem_ranges.append(memory_range) + + def set_cache_line( + self, + n_cache: int, + addr: int, + data: List[int], + state: List[bool] + ): + self.caches[n_cache].set_entry( + addr=addr, + data=data, + status=state + ) + + def create_transaction(self, n_cache: int, txn: CacheTransaction): + self.transactions[n_cache].add_transaction(txn) + + def generate_random_memory(self): + self.mem_state.gen_rand_mem() + + def generate_random_transactions(self): + for txn_seq in self.transactions: + txn_seq.generate_rand_sequence(self.n_transactions) + + def save_transactions(self): + for i, txn_seq in enumerate(self.transactions): + txn_seq.generate_file( + os.path.join(self.target_dir, f"txns_{i}.txt")) + + def save_memory(self): + self.mem_state.save_mem( + file=os.path.join(self.target_dir, "main_mem.mem")) + + def save_state(self): + self.save_caches() + self.save_transactions() + self.save_memory() + + def rand_choice(self, odds=0.5): + """Returns true for given odds""" + if random() < odds: + return True + return False + + def rand_index(self, n): + """Return random index from 0 to n""" + return randint(0, n) + + def rand_cache_index(self): + return self.rand_index(self.rand_index(self.n_caches)) + + def rand_sharers(self, owner): + sharers = [] + for idx in range(self.n_caches): + if idx == owner: + sharers.append(True) + else: + sharers.append(self.rand_choice()) + + def get_rand_cacheline_data(self): + data = [] + for _ in range(self.cacheline_bytes): + data.append(randint(0, 255)) + return data + + def get_rand_mem_range(self) -> MemoryRange: + return choice(self.mem_ranges) + + def generate_random_caches(self, n_inited_lines): + for _ in range(n_inited_lines): + # Get a random memory range + rand_mem_range = self.get_rand_mem_range() + # Get a random address from that memory range + # Aligned to cache line boundary + addr = rand_mem_range.get_rand_addr(self.cacheline_bytes) + # Get data from initialized memory + data = rand_mem_range.get_data(addr, self.cacheline_bytes) + + # Check if all caches have space for the new entry + # Skip if not + not_free_found = False + for cache in self.caches: + _, free = cache.get_free_way(cache.get_index(addr)) + if not free: + not_free_found = True + if not_free_found: + continue + + # Check if the address is already stored + # Skip if yes + hit_found = False + for cache in self.caches: + hit, _, _, _, _ = cache.get_addr(addr) + if hit: + hit_found = True + if hit_found: + continue + + # Select random number of masters to have that cache line + n_masters = randint(1, self.n_caches) + # Randomly select the master indices to have that cache line + mst_idxs = sample(range(self.n_caches), n_masters) + # Select whether someone will hold the line in dirty state + dirty = self.rand_choice(odds=0.5) + shared = len(mst_idxs) > 1 + owner = -1 + write_data = data + if dirty: + # Randomly select the owner + owner = sample(mst_idxs, 1)[0] + # All cachelines have the same data + write_data = self.get_rand_cacheline_data() + + for mst_idx in mst_idxs: + if mst_idx == owner: + # Generate random data since data is dirty + if shared: + state = CachelineState(CachelineStateEnum.OWNED) + else: + state = CachelineState(CachelineStateEnum.MODIFIED) + else: + if shared: + state = CachelineState(CachelineStateEnum.SHARED) + else: + state = CachelineState(CachelineStateEnum.EXCLUSIVE) + try: + self.set_cache_line( + mst_idx, + addr, + write_data, + state.get_state_bits() + ) + except CacheSetFullException: + pass + + def get_next_timestamp(self, files, cur_time): + """ + Returns (finish, next_tstamp, addrs_w_same_tstamp). + If finish == True, it means there are no more timestamps\n + `addrs_w_same_tstamp` is a list of (idx, addr), which indicates + the cache index that retires a transaction on this timestamp, and + the address it retires. + """ + # Store: + # - Timestamps that were found + # - Corresponding address + # - Corresponding master index + # There might be situations where some masters have run out of + # transactions while other ones still have outstanding ones, + # so this type of tracking is needed + timestamps = [] + addrs = [] + idxs = [] + addrs_w_tstamp = [] + for i, file in enumerate(files): + with open(file, "r") as cache_file: + for line in cache_file: + words = line.split() + time = None + initiator = None + addr = None + # Iterate over words (separated by whitespace) + for word in words: + # Check which keyword the word is + # The keywords must appear in the line in this order + t_idx = word.find("TIME:") + i_idx = word.find("INITIATOR:") + a_idx = word.find("ADDR:") + payload = word.split(":")[1] + if t_idx != -1: + time = int(payload) + if i_idx != -1: + initiator = bool(int(payload)) + if not initiator: + # Don't store the time of this timestamp marks an + # outstanding transaction + time = None + if a_idx != -1: + addr = int(payload, 16) + # Add to the list only if a transaction was retired on this + # timestamp + if time: + if time > cur_time: + timestamps.append(time) + addrs.append(addr) + idxs.append(i) + break + finish = False + next_tstamp = 0 + if all(x == float("inf") for x in timestamps): + finish = True + else: + next_tstamp = min(timestamps) + idx_w_same_tstamp = [i for i, x in enumerate(timestamps) if x == next_tstamp] + for i in idx_w_same_tstamp: + addrs_w_tstamp.append((idxs[i], addrs[i])) + return finish, next_tstamp, addrs_w_tstamp + + def reconstruct_state(self): + """Reconstruct state into Python datatypes""" + files = [] + start_time = 0 + errors = False + for i in range(self.n_caches): + files.append(os.path.join(self.target_dir, f"cache_diff_{i}.txt")) + while True: + finish, end_time, addrs = self.get_next_timestamp(files, start_time) + if finish: + break + for i, cache in enumerate(self.caches): + cache.reconstruct_state(files[i], start_time, end_time) + self.mem_state.reconstruct_mem(os.path.join(self.target_dir, "main_mem_diff.txt"), start_time, end_time) + logger.info(f"==================== TIMESTAMP: {end_time} ====================") + new_errors = self.check_coherency() + errors = errors or new_errors + for addr in addrs: + # Clear outstanding addresses for the ones that were handled this timestamp + for i in range(self.n_caches): + if i == addr[0]: + continue + if self.caches[i].clear_outstanding_addr(addr[1]): + logger.info("Removing address from outstanding") + self.print_info(addr=addr[1], cache_idx=i) + start_time = end_time + return errors + + def print_info(self, level=logging.INFO, addr=None, cache_idx=None, state=None, + set=None, way=None): + if addr is not None: + logger.log(level, msg=f"Address: {hex(addr)}") + if cache_idx is not None: + logger.log(level, msg=f"Cache: {cache_idx}") + if state is not None: + logger.log(level, msg=f"State: {state}") + if set is not None: + logger.log(level, msg=f"Set: {set}") + if way is not None: + logger.log(level, msg=f"Way: {way}") + + def check_coherency(self): + """Check that caches and main memory are coherent. + Test cases: + - Modified cache line must not be in Exclusive state + - Modified cache line must have it somewhere in either Owned or Modified state + - Cache line states must be compatible (e.g. Modified && Shared is not allowed) + """ + + logger.info("Starting coherency check") + error = False + debug = self.debug + + for mem_range in self.mem_ranges: + for addr in range( + mem_range.start_addr, + mem_range.end_addr, + self.cacheline_bytes): + cached, shared = mem_range.get_addr_properties(addr) + skip_addr = False + if not (shared and cached): + # Currently only checking shared and cached regions + continue + + # Check if there are addresses which have outstanding transactions + # This occurs when a snoop transaction has modified a cache line, but + # the transaction itself didnt finish yet + for cache in self.caches: + if addr in cache.outstanding: + skip_addr = True + logger.info("Skipping address due to an outstanding transaction") + self.print_info(logging.INFO, addr=addr) + break + if skip_addr: + continue + + cacheline = mem_range.get_data(addr, self.cacheline_bytes) + states: List[CachelineState] = [] + modified = False + owner_found = False + + # Check all caches whether they hold a copy + # Compute moesi state + # Check that modified copy is not in Exclusive state + # Monitor whether a modified copy exists + # Monitor whether an owner is found + for i, cache in enumerate(self.caches): + hit, data, state, set, way = cache.get_addr(addr) + moesi: CachelineState = state + if hit: + logger.info("Cacheline found") + self.print_info(logging.INFO, addr=addr, cache_idx=i, state=moesi.state.name, set=set, way=way) + if data != cacheline: + if moesi.state != CachelineStateEnum.INVALID: + modified = True + if moesi.state == CachelineStateEnum.EXCLUSIVE: + logger.error("A modified cache line in Exclusive state") + self.print_info(logging.ERROR, addr=addr, cache_idx=i, state=moesi.state.name, set=set, way=way) + error = True + if debug: import pdb; pdb.set_trace() + if moesi.state in \ + [CachelineStateEnum.OWNED, CachelineStateEnum.MODIFIED]: + owner_found = True + states.append(moesi) + + if modified and not owner_found: + error = True + logger.error("A modified cache line without owner was found!") + self.print_info(logging.ERROR, addr=addr, set=set) + if debug: import pdb; pdb.set_trace() + + # Compare cacheline states + for i in range(len(states)): + for j in range(len(states)): + if i == j: + continue + res = states[i].check_compatibility(states[j].state) + if not res: + a_hit, _, a_state, a_set, a_way = self.caches[i].get_addr(addr) + b_hit, _, b_state, b_set, b_way = self.caches[j].get_addr(addr) + logger.error("Two cache lines in incompatible states!") + self.print_info( + logging.ERROR, + addr=addr, + cache_idx=(i, j), + state=(states[i].state.name, states[j].state.name), + set=(a_set, b_set), + way=(a_way, b_way) + ) + error = True + if debug: import pdb; pdb.set_trace() + logger.info("Coherency check finished") + return error + + def save_caches(self): + for i, cache in enumerate(self.caches): + cache.save_state( + data_file=os.path.join(self.target_dir, f"data_mem_{i}.mem"), + tag_file=os.path.join(self.target_dir, f"tag_mem_{i}.mem"), + state_file=os.path.join(self.target_dir, f"state_{i}.mem") + ) + + def run(self): + errors = False + if self.check: + input("Press enter after simulation finishes to start coherency check") + errors = self.reconstruct_state() + return errors + + + +class RandomTest(CacheCoherencyTest): + def __init__( + self, + **kwargs + ): + super().__init__(**kwargs) + self.define_test() + errors = self.run() + if errors: + print("Errors found") + + def define_test(self): + self.add_memory_range(MemoryRange( + cached=True, shared=True, start_addr=0, end_addr=0x0000_1000 + )) + self.generate_random_memory() + self.generate_random_transactions() + self.generate_random_caches(n_inited_lines=100) + self.check_coherency() + self.save_state() + +class ConflictTest(CacheCoherencyTest): + def __init__( + self, + **kwargs + ): + super().__init__(**kwargs) + self.define_test() + + def define_test(self): + self.add_memory_range(MemoryRange( + cached=True, shared=True, start_addr=0, end_addr=0x0010_0000 + )) + self.generate_random_memory() + self.create_transaction(n_cache=0, txn=CacheTransaction( + addr=0, + op=CacheReqOp.REQ_LOAD, + size=int(log2(self.dw)), + shareability=1, + cached=True, + time=10 + )) + self.create_transaction(n_cache=1, txn=CacheTransaction( + addr=0, + op=CacheReqOp.REQ_LOAD, + size=int(log2(self.dw)), + shareability=1, + cached=True, + time=10 + )) + self.save_state() + + +if __name__ == "__main__": + import argparse + from random import seed + import numpy as np + parser = argparse.ArgumentParser( + description=('Script to write data to a file' + 'based on address space.') + ) + parser.add_argument( + '--addr_width', + type=int, + help='AXI address width' + ) + parser.add_argument( + '--data_width', + type=int, + help='AXI data width' + ) + parser.add_argument( + '--word_width', + type=int, + help='Width of a word in the cache' + ) + parser.add_argument( + '--cacheline_words', + type=int, + help='Number of words in a cacheline' + ) + parser.add_argument( + '--ways', + type=int, + help='Number of ways in the cache' + ) + parser.add_argument( + '--sets', + type=int, + help='Number of sets in the cache' + ) + parser.add_argument( + '--n_caches', + type=int, + help='Number of cached masters in the test' + ) + parser.add_argument( + '--n_transactions', + type=int, + help='Number of transactions generated per cached master' + ) + parser.add_argument( + '--target_dir', + type=str, + help='Target directory for generated files' + ) + parser.add_argument( + '--seed', + type=int, + help="Seed for the simulation", + default=None, + nargs='?' + ) + parser.add_argument( + '--check', + action='store_true', + help="Check for coherency once prompted" + ) + parser.add_argument( + '--debug', + action='store_true', + help="Debug mode. During coherency checking, will open pdb when error is encountered." + ) + parsed_args = vars(parser.parse_args()) + if parsed_args.get("seed", None): + seed(parsed_args["seed"]) + np.random.seed(parsed_args["seed"]) + cct = RandomTest(**parsed_args) diff --git a/test/vip/python/cache_state.py b/test/vip/python/cache_state.py new file mode 100644 index 0000000..521c07b --- /dev/null +++ b/test/vip/python/cache_state.py @@ -0,0 +1,319 @@ +from typing import List, Tuple +from math import log2 +from enum import Enum + +class StateBits(Enum): + VALID_IDX = 0 + SHARED_IDX = 1 + DIRTY_IDX = 2 + +class CachelineStateEnum(Enum): + MODIFIED = 0 + OWNED = 1 + EXCLUSIVE = 2 + SHARED = 3 + INVALID = 4 + +class CachelineState: + def __init__(self, state: CachelineStateEnum = CachelineStateEnum.INVALID): + self.state = state + + def from_state_bits(self, state_bits: List[StateBits]): + if state_bits[StateBits.VALID_IDX.value] == 0: + self.state = CachelineStateEnum.INVALID + elif (state_bits[StateBits.SHARED_IDX.value] and + state_bits[StateBits.DIRTY_IDX.value]): + self.state = CachelineStateEnum.OWNED + elif state_bits[StateBits.SHARED_IDX.value]: + self.state = CachelineStateEnum.SHARED + elif state_bits[StateBits.DIRTY_IDX.value]: + self.state = CachelineStateEnum.MODIFIED + elif state_bits[StateBits.VALID_IDX.value]: + self.state = CachelineStateEnum.EXCLUSIVE + else: + raise Exception("Unexpected state") + + def get_state_bits(self): + state_bits = [False, False, False] + if self.state == CachelineStateEnum.MODIFIED: + state_bits[StateBits.VALID_IDX.value] = True + state_bits[StateBits.DIRTY_IDX.value] = True + elif self.state == CachelineStateEnum.OWNED: + state_bits[StateBits.VALID_IDX.value] = True + state_bits[StateBits.SHARED_IDX.value] = True + state_bits[StateBits.DIRTY_IDX.value] = True + elif self.state == CachelineStateEnum.EXCLUSIVE: + state_bits[StateBits.VALID_IDX.value] = True + elif self.state == CachelineStateEnum.SHARED: + state_bits[StateBits.VALID_IDX.value] = True + state_bits[StateBits.SHARED_IDX.value] = True + return state_bits + + def check_compatibility(self, other: CachelineStateEnum): + if self.state == CachelineStateEnum.MODIFIED: + if other == CachelineStateEnum.INVALID: + return True + return False + elif self.state == CachelineStateEnum.OWNED: + if other in [CachelineStateEnum.INVALID, + CachelineStateEnum.SHARED]: + return True + return False + elif self.state == CachelineStateEnum.EXCLUSIVE: + if other == CachelineStateEnum.INVALID: + return True + return False + elif self.state == CachelineStateEnum.SHARED: + if other in [CachelineStateEnum.EXCLUSIVE, + CachelineStateEnum.MODIFIED]: + return False + return True + elif self.state == CachelineStateEnum.INVALID: + return True + else: + raise Exception("Unexpected state") + +class CacheSetFullException(Exception): + pass + +class CacheState: + def __init__( + self, + addr_width, + data_width, + word_width, + cacheline_words, + ways, + sets + ): + self.aw = addr_width + self.dw = data_width + self.word_width = word_width + self.cacheline_words = cacheline_words + self.ways = ways + self.sets = sets + + self.bytes_per_word = self.dw // 8 + self.cacheline_bytes = \ + self.cacheline_words * self.word_width // 8 + self.block_offset_bits = int(log2(self.cacheline_bytes)) + self.index_bits = int(log2(self.sets)) + self.tag_bits = \ + self.aw - self.block_offset_bits - self.index_bits + + self.index_mask = ((1 << self.index_bits) - 1) << self.block_offset_bits + self.tag_mask = ((1 << self.tag_bits) - 1) << (self.block_offset_bits + self.index_bits) + + self.cache_status = None + self.cache_data = None + self.cache_tag = None + + # Store which cache lines are "outstanding" + # i.e. a snoop has modified their status, but the + # respective transaction has not finished + self.outstanding = [] + + def init_cache(self): + # multi-dimensional lists must be initialized in steps + # to ensure that unique copies are created, instead of + # references to one + self.cache_status = self.sets * [None] + self.cache_tag = self.sets * [None] + self.cache_data = self.sets * [None] + for set in range(self.sets): + self.cache_status[set] = self.ways * [None] + self.cache_tag[set] = self.ways * [None] + self.cache_data[set] = self.ways * [None] + for way in range(self.ways): + self.cache_status[set][way] = 3 * [False] + self.cache_tag[set][way] = 0 + self.cache_data[set][way] = self.cacheline_bytes * [0] + + def get_index(self, addr): + return (addr & self.index_mask) >> self.block_offset_bits + + def get_tag(self, addr): + return (addr & self.tag_mask) >> (self.block_offset_bits + self.index_bits) + + def get_addr(self, addr): + """Returns: (hit, data, state, set, way)""" + set = self.get_index(addr) + hit = False + final_way = 0 + data = [] + state = CachelineState() + tag_bits = self.get_tag(addr) + for way in range(self.ways): + if ((self.cache_tag[set][way] == tag_bits) and + (self.cache_status[set][way][StateBits.VALID_IDX.value])): + hit = self.cache_status[set][way][StateBits.VALID_IDX.value] + data = self.cache_data[set][way] + state.from_state_bits(self.cache_status[set][way]) + final_way = way + return hit, data, state, set, final_way + + def get_free_way(self, set): + """Get first free (non-valid) way in a set.""" + was_free = False + way_idx = 0 + for i, way in enumerate(self.cache_status[set]): + if not way[StateBits.VALID_IDX.value]: + way_idx = i + was_free = True + break + return way_idx, was_free + + def set_entry( + self, + addr: int, + data: List[int], + status: List[bool] + ): + """Write cacheline corresponding to addr with data and status. + Assumes we write the whole cache line byte-by-byte + """ + set_idx = self.get_index(addr) + way_idx, was_free = self.get_free_way(set_idx) + if not was_free: + raise CacheSetFullException + for byte_idx in range(self.cacheline_bytes): + self.cache_data[set_idx][way_idx][byte_idx] = \ + data[byte_idx] + self.cache_tag[set_idx][way_idx] = self.get_tag(addr) + self.cache_status[set_idx][way_idx][0] = status[0] + self.cache_status[set_idx][way_idx][1] = status[1] + self.cache_status[set_idx][way_idx][2] = status[2] + + def save_data( + self, + file + ): + with open(file, "w") as data_file: + for set in range(self.sets): + fmt = [f"@{set:x}"] + any_valid = False + for way in range(self.ways): + if (self.cache_status[set][way][StateBits.VALID_IDX.value]): + any_valid = True + for byte in self.cache_data[set][way]: + fmt += [f"{byte:2x}"] + if any_valid: + data_file.write(" ".join(fmt) + "\n") + + def save_tag( + self, + file + ): + with open(file, "w") as tag_file: + for set in range(self.sets): + fmt = [f"@{set:x}"] + any_valid = False + for way in range(self.ways): + if (self.cache_status[set][way][StateBits.VALID_IDX.value]): + any_valid = True + fmt += [f"{self.cache_tag[set][way]:2x}"] + if any_valid: + tag_file.write(" ".join(fmt) + "\n") + + def status_arr_to_int(self, bool_arr): + bin_str = ''.join(['1' if x else '0' for x in list(reversed(bool_arr))]) + return int(bin_str, 2) + + def save_status( + self, + file + ): + with open(file, "w") as state_file: + for set in range(self.sets): + fmt = [f"@{set:x}"] + any_valid = False + for way in range(self.ways): + if (self.cache_status[set][way][StateBits.VALID_IDX.value]): + any_valid = True + fmt += [f"{self.status_arr_to_int(self.cache_status[set][way]):03b}"] + if any_valid: + state_file.write(" ".join(fmt) + "\n") + + def save_state( + self, + data_file="data_mem.mem", + tag_file="tag_mem.mem", + state_file="state.mem" + ): + self.save_data(data_file) + self.save_tag(tag_file) + self.save_status(state_file) + + def clear_outstanding_addr(self, addr): + """Remove addr from self.outstanding. + Returns True if the address was stored. + Returns False if it wasn't.""" + try: + self.outstanding.remove(addr) + return True + except ValueError: + return False + + def reconstruct_state( + self, + file, + start_time, + end_time + ): + with open(file, "r") as state_file: + for line in state_file: + words = line.split() + addr = None + time = None + initiator = None + set = None + way = None + tag = None + status = None + data = None + modify = True + for word in words: + time_idx = word.find("TIME:") + initiator_idx = word.find("INITIATOR:") + addr_idx = word.find("ADDR:") + set_idx = word.find("SET:") + way_idx = word.find("WAY") + tag_idx = word.find("TAG:") + status_idx = word.find("STATUS:") + data_idx = word.find("DATA:") + payload = word.split(":")[1] + if time_idx != -1: + time = int(payload) + if addr_idx != -1: + addr = int(payload, 16) + if set_idx != -1: + set = int(payload) + if initiator_idx != -1: + initiator = bool(int(payload)) + if way_idx != -1: + way = int(payload) + if tag_idx != -1: + tag = int(payload, 16) + if status_idx != -1: + status = [char == '1' for char in payload] + status.reverse() + if data_idx != -1: + data = [int(x, 16) for x in payload.strip("[]").split(",")] + if None in [time,initiator,set,way,tag,status,data]: + # A row with only time and address present indicates + # a finished transaction which wasn't cached but might've + # modified other cache lines by snooping + if None in [time, addr]: + print("Unexpected state") + import pdb; pdb.set_trace() + modify = False + if time > end_time: + return time + if time <= start_time: + continue + if modify: + self.cache_data[set][way] = data + self.cache_tag[set][way] = tag + self.cache_status[set][way] = status + if not initiator: + self.outstanding.append(addr) diff --git a/test/vip/python/common.py b/test/vip/python/common.py new file mode 100644 index 0000000..df71354 --- /dev/null +++ b/test/vip/python/common.py @@ -0,0 +1,116 @@ +import numpy as np +from random import randrange +from typing import List + +class MemoryRange: + def __init__( + self, + start_addr: int, + end_addr: int, + cached: bool = False, + shared: bool = False, + ): + """ + Parameters + ========== + start_addr Start address.\n + end_addr End address.\n + cached Set whole range as cached.\n + shared Set whole range as shared.\n + """ + + # Start address of the range (inclusive) + self.start_addr = start_addr + # End address of the range (non-inclusive) + self.end_addr = end_addr + # Data + self.mem_data = [] + # Subrange that is cached + self.cached_region: MemoryRange = None + # Subrange that is shared + self.shared_region: MemoryRange = None + + if cached: + self.set_cached_region(start_addr, end_addr) + if shared: + self.set_shared_region(start_addr, end_addr) + + def init_random_mem(self): + self.mem_data = np.random.randint( + 0, 256, size=(self.end_addr-self.start_addr), + dtype=np.uint8) + + def init_zero_mem(self): + self.mem_data = np.zeros( + size=(self.end_addr-self.start_addr), + dtype=np.uint8) + + def set_cached_region(self, start_addr, end_addr): + self.cached_region = MemoryRange( + start_addr=start_addr, + end_addr=end_addr + ) + + def set_shared_region(self, start_addr, end_addr): + self.shared_region = MemoryRange( + start_addr=start_addr, + end_addr=end_addr + ) + + def get_addr_properties(self, addr): + """Get whether address is cached and/or shared + Returns (cached, shared) + """ + cached = False + shared = False + if self.cached_region: + if self.cached_region.start_addr <= addr <= self.cached_region.end_addr: + cached = True + if self.shared_region: + if self.shared_region.start_addr <= addr \ + <= self.shared_region.end_addr: + shared = True + return cached, shared + + def get_rand_addr(self, step): + return randrange(self.start_addr, self.end_addr, step) + + def get_rand_cached_addr(self, step): + return randrange( + self.cached_region.start_addr, + self.cached_region.end_addr, + step) + + def get_rand_shared_addr(self, step): + return randrange( + self.shared_region.start_addr, + self.shared_region.end_addr, + step) + + def get_rand_cached_shared_addr(self, step): + if (not self.cached_region) or (not self.shared_region): + raise Exception("Either cached or shared region is missing") + if (self.cached_region.start_addr <= + self.shared_region.start_addr): + start_addr = self.shared_region.start_addr + else: + start_addr = self.cached_region.start_addr + if (self.cached_region.end_addr >= + self.shared_region.end_addr): + end_addr = self.shared_region.end_addr + else: + end_addr = self.cached_region.end_addr + if end_addr < start_addr: + raise Exception("No overlapping shared and cached regions") + return randrange(start_addr, end_addr, step) + + def get_data(self, addr, len): + """Return an array of length len, consisting of bytes""" + data = [] + start_idx = addr - self.start_addr + end_idx = start_idx + len + for i in range(start_idx, end_idx): + data.append(self.mem_data[i]) + return data + + diff --git a/test/vip/python/memory_state.py b/test/vip/python/memory_state.py new file mode 100644 index 0000000..ce4495f --- /dev/null +++ b/test/vip/python/memory_state.py @@ -0,0 +1,77 @@ +from common import MemoryRange +from typing import List +import pdb + +class MemoryState: + def __init__( + self, + mem_ranges: List[MemoryRange] = [] + ): + self.mem_ranges: List[MemoryRange] = mem_ranges + + def gen_rand_mem(self): + for mem_range in self.mem_ranges: + mem_range.init_random_mem() + + def store(self, addr, data): + range_found = False + for mem_range in self.mem_ranges: + if mem_range.start_addr <= addr <= mem_range.end_addr: + range_found = True + mem_range.mem_data[addr - mem_range.start_addr] = data + if not range_found: + raise Exception("Provided an address outside the memory range(s)") + + def reconstruct_mem( + self, + file, + start_time, + end_time + ) -> int: + """ + Updates memory given the transactions in a file. + Returns the time stamp that was the first one that was not updated. + """ + with open(file, "r") as mem_file: + for line in mem_file: + words = line.split() + time = -1 + addr = None + data = None + for word in words: + t_idx = word.find("TIME:") + a_idx = word.find("ADDR:") + d_idx = word.find("DATA:") + payload = word.split(":")[1] + if t_idx != -1: + time = int(payload) + if a_idx != -1: + addr = int(payload, 16) + if d_idx != -1: + data = int(payload, 16) + if time > end_time: + return time + if (time < start_time) and time != -1: + continue + if (addr is not None) and (data is not None): + self.store(addr, data) + elif (addr is not None) or (data is not None): + raise Exception( + "Either data or addr provided without the other" + ) + + def save_mem( + self, + file="main_mem.mem", + ): + with open(file, "w") as mem_file: + mem_file.write("@0\n") + for mem_range in self.mem_ranges: + for addr in range(mem_range.start_addr, mem_range.end_addr, 4): + fmt = "{:2x} {:2x} {:2x} {:2x}\n".format( + mem_range.mem_data[addr - mem_range.start_addr], + mem_range.mem_data[addr - mem_range.start_addr + 1], + mem_range.mem_data[addr - mem_range.start_addr + 2], + mem_range.mem_data[addr - mem_range.start_addr + 3] + ) + mem_file.write(fmt) diff --git a/test/vip/python/transactions.py b/test/vip/python/transactions.py new file mode 100644 index 0000000..d1fc830 --- /dev/null +++ b/test/vip/python/transactions.py @@ -0,0 +1,168 @@ +from random import choice, randrange, choices +from enum import Enum +from math import log2 +from common import MemoryRange +from typing import List + +class ReadSnoopType(Enum): + READNOSNOOP = 0 + READONCE = 0 + READSHARED = 1 + READCLEAN = 2 + READNOTSHAREDDIRTY = 3 + READUNIQUE = 7 + CLEANUNIQUE = 11 + MAKEUNIQUE = 12 + CLEANSHARED = 8 + CLEANINVALID = 9 + MAKEINVALID = 13 + BARRIER = 0 + DMVCOMPLETE = 14 + DVMMESSAGE = 15 + +class WriteSnoopType(Enum): + WRITENOSNOOP = 0 + WRITEUNIQUE = 0 + WRITELINEUNIQUE = 1 + WRITECLEAN = 2 + WRITEBACK = 3 + EVICT = 4 + WRITEEVICT = 5 + BARRIER = 0 + +class BurstType(Enum): + FIXED = 0 + INCR = 1 + WRAP = 2 + +class CacheReqOp(Enum): + REQ_LOAD = 0 + REQ_STORE = 1 + #CMO_FLUSH_NLINE = 2 + +class WritePolicyHint(Enum): + WR_POLICY_WB = 2 + WR_POLICY_WT = 4 + +class CacheTransaction: + def __init__( + self, + addr: int, + op: CacheReqOp, + data: int = 0, + size: int = 0, + shareability: int = 0, + cached: bool = False, + time: int = 0, + ): + """ + Parameters + ========== + addr + Request address. + op + Operation. Type CacheReqOp. + data + Write data. + size + Size of operation as in AXI AxSIZE. + shareability + Shareable domain. Currently non-shared (0), inner shared (1), + and system (3) supported. + cached + Whether request is cached. + time + The time stamp to send the request. In clock steps after reset. + If 0 (default), it will be sent as soon as possible. + """ + self.addr = addr + self.data = data + self.op = op + self.size = size + self.shareability = shareability + self.cached = cached + self.time = time + +class CacheTransactionSequence: + def __init__( + self, + addr_width, + data_width, + mem_ranges: List[MemoryRange] + ): + self.aw = addr_width + self.dw = data_width + self.mem_ranges = mem_ranges + self.sequence : list[CacheTransaction] = [] + self.separator = " " + + def add_transaction(self, txn: CacheTransaction): + self.sequence.append(txn) + + def generate_rand_sequence(self, n_transactions): + for _ in range(n_transactions): + txn = self.gen_rand_transaction() + self.sequence.append(txn) + + def get_rand_mem_range(self): + return choice(self.mem_ranges) + + def get_rand_data(self): + return randrange(0, (1 << self.dw) - 1) + + def gen_rand_transaction(self): + mem_range = self.get_rand_mem_range() + addr = mem_range.get_rand_cached_shared_addr(self.dw // 8) + shareability = 1 + op = choice(list(CacheReqOp)) + if op == CacheReqOp.REQ_LOAD: + cached = True + else: + # 20% chance to generate uncached request + cached = choices([True, False], weights=[80, 20], k=1)[0] + data = self.get_rand_data() + size = int(log2(self.dw)) + return CacheTransaction( + addr=addr, + op=op, + data=data, + size=size, + shareability=shareability, + cached=cached, + time=0 + ) + + def generate_file(self, filename): + first = True + with open(filename, "w") as file: + for txn in self.sequence: + if not first: + file.write("\n") + else: + first = False + file.write( + f"OPER:{txn.op.name} ADDR:{txn.addr:0{self.aw // 4}x} " + f"DATA:{txn.data:0{self.dw // 4}x} SIZE:{txn.size} " + f"CACH:{int(txn.cached)} SHAR:{txn.shareability} TIME:{txn.time}" + ) + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser( + description=('Script to generate random transactions') + ) + parser.add_argument( + 'file', + type=str, + help='The filename where data will be written' + ) + parser.add_argument( + 'n', + type=int, + help='Number of transactiosn' + ) + args = parser.parse_args() + cts = CacheTransactionSequence() + cts.generate_rand_sequence(args.n) + cts.generate_file(args.file) diff --git a/test/vip/snoop/snoop_agent.svh b/test/vip/snoop/snoop_agent.svh new file mode 100644 index 0000000..05454b3 --- /dev/null +++ b/test/vip/snoop/snoop_agent.svh @@ -0,0 +1,98 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +class snoop_agent #( + /// Snoop address width + parameter AW = 32, + /// Snoop data width + parameter DW = 32, + /// Bytes in a cacheline + parameter CACHELINE_BYTES = 0, + /// Stimuli application time + parameter time TA = 0ns, + /// Stimuli test time + parameter time TT = 0ns, + /// Snoop bus interface type + parameter type snoop_bus_t = logic, + /// Clock interface type + parameter type clk_if_t = logic +); + typedef ace_ac_beat #( + .AW(AW) + ) ac_beat_t; + + typedef ace_cr_beat cr_beat_t; + + typedef ace_cd_beat #( + .DW(DW) + ) cd_beat_t; + + snoop_bus_t snoop; + clk_if_t clk_if; + + mailbox #(ac_beat_t) ac_mbx = new; + mailbox #(cd_beat_t) cd_mbx = new; + mailbox #(cr_beat_t) cr_mbx = new; + + snoop_driver #( + .TA(TA), .TT(TT), + .snoop_bus_t(snoop_bus_t), + .ac_beat_t(ac_beat_t), + .cd_beat_t(cd_beat_t), + .cr_beat_t(cr_beat_t) + ) snoop_drv; + + snoop_monitor #( + .TA(TA), .TT(TT), + .snoop_bus_t(snoop_bus_t), + .ac_beat_t(ac_beat_t), + .cd_beat_t(cd_beat_t), + .cr_beat_t(cr_beat_t) + ) snoop_mon; + + snoop_sequencer #( + .TA(TA), .TT(TT), .CD_DW(DW), + .CACHELINE_BYTES(CACHELINE_BYTES), + .ac_beat_t(ac_beat_t), + .cd_beat_t(cd_beat_t), + .cr_beat_t(cr_beat_t) + ) snoop_seq; + + function new( + snoop_bus_t snoop, + clk_if_t clk_if, + mailbox #(cache_snoop_req) snoop_req_mbx, + mailbox #(cache_snoop_resp) snoop_resp_mbx + ); + this.snoop = snoop; + this.clk_if = clk_if; + + this.snoop_drv = new( + this.snoop, this.cr_mbx, + this.cd_mbx + ); + this.snoop_mon = new( + this.snoop, this.ac_mbx + ); + this.snoop_seq = new( + this.ac_mbx, this.cr_mbx, + this.cd_mbx, + snoop_req_mbx, + snoop_resp_mbx + ); + + endfunction + + task reset; + this.snoop_drv.reset(); + endtask + + task run; + fork + this.snoop_drv.run(); + this.snoop_mon.run(); + this.snoop_seq.run(); + join + endtask + +endclass diff --git a/test/vip/snoop/snoop_beat_types.svh b/test/vip/snoop/snoop_beat_types.svh new file mode 100644 index 0000000..b3c7a82 --- /dev/null +++ b/test/vip/snoop/snoop_beat_types.svh @@ -0,0 +1,36 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +/// The data transferred on a beat on the AC channel. +class ace_ac_beat #( + parameter AW = 32 +); + rand logic [AW-1:0] ac_addr = '0; + logic [3:0] ac_snoop = '0; + logic [2:0] ac_prot = '0; +endclass + +/// The data transferred on a beat on the CR channel. +class ace_cr_beat; + ace_pkg::crresp_t cr_resp = '0; +endclass + +/// The data transferred on a beat on the CD channel. +class ace_cd_beat #( + parameter DW = 32 +); + rand logic [DW-1:0] cd_data = '0; + logic cd_last = '0; +endclass + +// Snoop request to a cache +class cache_snoop_req; + int unsigned addr = 0; + ace_pkg::acsnoop_t snoop_op = '0; +endclass + +// Snoop response from a cache +class cache_snoop_resp; + logic [7:0] data_q[$]; + ace_pkg::crresp_t snoop_resp = '0; +endclass \ No newline at end of file diff --git a/test/vip/snoop/snoop_driver.svh b/test/vip/snoop/snoop_driver.svh new file mode 100644 index 0000000..104256e --- /dev/null +++ b/test/vip/snoop/snoop_driver.svh @@ -0,0 +1,116 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +class snoop_driver #( + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type snoop_bus_t = logic, + parameter type ac_beat_t = logic, + parameter type cd_beat_t = logic, + parameter type cr_beat_t = logic +); + + snoop_bus_t snoop; + + cd_beat_t cd_txn; + cr_beat_t cr_txn; + + // Mailboxes for CD and CR transcations + // Should be created and connected outside + mailbox #(cd_beat_t) cd_mbx; + mailbox #(cr_beat_t) cr_mbx; + + function new ( + snoop_bus_t snoop, + mailbox #(cr_beat_t) cr_mbx, + mailbox #(cd_beat_t) cd_mbx + ); + this.snoop = snoop; + + this.cr_mbx = cr_mbx; + this.cd_mbx = cd_mbx; + endfunction + + task cycle_start; + #TT; + endtask + + task cycle_end; + @(posedge snoop.clk_i); + endtask + + task reset; + snoop.ac_ready <= '0; + snoop.cr_valid <= '0; + snoop.cr_resp <= '0; + snoop.cd_valid <= '0; + snoop.cd_data <= '0; + snoop.cd_last <= '0; + endtask + + task rec_cd_txns; + // Ensure that mailbox is read only + // at cycle_end + forever begin + cd_beat_t beat; + if (cd_mbx.try_get(beat)) begin + send_cd(beat); + end else begin + cycle_end(); + end + end + endtask + + task rec_cr_txns; + // Ensure that mailbox is read only + // at cycle_end + forever begin + cr_beat_t beat; + if (cr_mbx.try_get(beat)) begin + send_cr(beat); + end else begin + cycle_end(); + end + end + endtask + + /// Issue a beat on the CR channel. + task send_cr(cr_beat_t beat); + snoop.cr_valid <= #TA 1; + snoop.cr_resp <= #TA beat.cr_resp; + cycle_start(); + while (snoop.cr_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + snoop.cr_valid <= #TA '0; + snoop.cr_resp <= #TA '0; + endtask + + /// Issue a beat on the CD channel. + task send_cd(cd_beat_t beat); + snoop.cd_valid <= #TA 1; + snoop.cd_data <= #TA beat.cd_data; + snoop.cd_last <= #TA beat.cd_last; + cycle_start(); + while (snoop.cd_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + snoop.cd_valid <= #TA '0; + snoop.cd_data <= #TA '0; + snoop.cd_last <= #TA '0; + endtask + + /// Randomly toggle ACREADY. + /// Address is read in snoop_monitor. + task recv_ac (); + snoop.ac_ready <= #TA $urandom_range(0,1); + cycle_start(); + cycle_end(); + endtask + + task run(); + fork + rec_cd_txns(); + rec_cr_txns(); + forever recv_ac(); + join + endtask +endclass diff --git a/test/vip/snoop/snoop_monitor.svh b/test/vip/snoop/snoop_monitor.svh new file mode 100644 index 0000000..1fec76c --- /dev/null +++ b/test/vip/snoop/snoop_monitor.svh @@ -0,0 +1,50 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +class snoop_monitor #( + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type snoop_bus_t = logic, + parameter type ac_beat_t = logic, + parameter type cd_beat_t = logic, + parameter type cr_beat_t = logic +); + + snoop_bus_t snoop; + + // Mailbox for AC transactions + // Should be created and connected outside + mailbox #(ac_beat_t) ac_mbx; + + task cycle_start; + #TT; + endtask + + task cycle_end; + @(posedge snoop.clk_i); + endtask + + function new ( + snoop_bus_t snoop, + mailbox #(ac_beat_t) ac_mbx + ); + this.snoop = snoop; + this.ac_mbx = ac_mbx; + endfunction + + task mon_ac; + ac_beat_t ac_txn = new; + cycle_start(); + while (!(snoop.ac_valid && snoop.ac_ready)) begin cycle_end(); cycle_start(); end + ac_txn.ac_addr = snoop.ac_addr; + ac_txn.ac_snoop = snoop.ac_snoop; + ac_txn.ac_prot = snoop.ac_prot; + ac_mbx.put(ac_txn); + cycle_end(); + endtask + + task run; + forever mon_ac(); + endtask + +endclass diff --git a/test/vip/snoop/snoop_sequencer.svh b/test/vip/snoop/snoop_sequencer.svh new file mode 100644 index 0000000..0cf4cd7 --- /dev/null +++ b/test/vip/snoop/snoop_sequencer.svh @@ -0,0 +1,90 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +class snoop_sequencer #( + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter int CD_DW = 0, + parameter int CACHELINE_BYTES = 0, + parameter type ac_beat_t = logic, + parameter type cd_beat_t = logic, + parameter type cr_beat_t = logic +); + + cd_beat_t cd_txn; + + localparam int BYTES_PER_CD_DW = CD_DW / 8; + + // Mailboxes for snoop transactions + // Should be created and connected outside + mailbox #(ac_beat_t) ac_mbx; + mailbox #(cr_beat_t) cr_mbx; + mailbox #(cd_beat_t) cd_mbx; + + mailbox #(cache_snoop_req) snoop_req_mbx; + mailbox #(cache_snoop_resp) snoop_resp_mbx; + + function new( + mailbox #(ac_beat_t) ac_mbx, + mailbox #(cr_beat_t) cr_mbx, + mailbox #(cd_beat_t) cd_mbx, + mailbox #(cache_snoop_req) snoop_req_mbx, + mailbox #(cache_snoop_resp) snoop_resp_mbx + ); + this.ac_mbx = ac_mbx; + this.cr_mbx = cr_mbx; + this.cd_mbx = cd_mbx; + + this.snoop_req_mbx = snoop_req_mbx; + this.snoop_resp_mbx = snoop_resp_mbx; + endfunction + + function cd_beat_t gen_rand_cd; + cd_beat_t beat = new; + beat.cd_data = $urandom(); + beat.cd_last = '0; + return beat; + endfunction + + function cr_beat_t gen_rand_cr; + cr_beat_t beat = new; + beat.cr_resp[4:2] = $urandom_range(0, 3'b111); + beat.cr_resp[1] = 1'b0; + beat.cr_resp[0] = $urandom_range(0, 1); + return beat; + endfunction + + task gen_snoop_resp; + ac_beat_t ac_beat; + cd_beat_t cd_beat = new; + cr_beat_t cr_beat = new; + cache_snoop_req cache_req = new; + cache_snoop_resp cache_resp; + int byte_count = 0; + ac_mbx.get(ac_beat); + cache_req.addr = ac_beat.ac_addr; + cache_req.snoop_op = ac_beat.ac_snoop; + snoop_req_mbx.put(cache_req); + snoop_resp_mbx.get(cache_resp); + cr_beat.cr_resp = cache_resp.snoop_resp; + cr_mbx.put(cr_beat); + if (cache_resp.snoop_resp.DataTransfer) begin + for (int i = 0; i < CACHELINE_BYTES; i++) begin + cd_beat.cd_data[byte_count*8 +: 8] = cache_resp.data_q.pop_front(); + cd_beat.cd_last = 1'b0; + byte_count++; + if (byte_count == BYTES_PER_CD_DW) begin + if (i == (CACHELINE_BYTES - 1)) cd_beat.cd_last = 1'b1; + cd_mbx.put(cd_beat); + cd_beat = new; + byte_count = 0; + end + end + end + endtask + + task run; + forever gen_snoop_resp(); + endtask + +endclass diff --git a/test/vip/snoop_test_pkg.sv b/test/vip/snoop_test_pkg.sv new file mode 100644 index 0000000..cb00bd6 --- /dev/null +++ b/test/vip/snoop_test_pkg.sv @@ -0,0 +1,24 @@ +package snoop_test_pkg; + `define _SNOOP_TEST_PKG + + typedef enum logic [3:0] { + AC_READ_ONCE = 0, + AC_READ_SHARED = 1, + AC_READ_CLEAN = 2, + AC_READ_NOT_SHARED_DIRTY = 3, + AC_READ_UNIQUE = 4, + AC_CLEAN_SHARED = 5, + AC_CLEAN_INVALID = 6, + AC_MAKE_INVALID = 7, + AC_DVM_COMPLETE = 8, + AC_DVM_MESSAGE = 9 + } ac_snoop_e; + + `include "snoop/snoop_beat_types.svh" + `include "snoop/snoop_driver.svh" + `include "snoop/snoop_monitor.svh" + `include "snoop/snoop_sequencer.svh" + `include "snoop/snoop_agent.svh" + + +endpackage \ No newline at end of file From 8d8bcb8fb6c86bb524c03cec497cf6c4772fbc5f Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 18 Jun 2025 12:58:31 +0200 Subject: [PATCH 039/109] ccu_snoop_pipe: rename stall signal and fix missing default values --- src/ccu/ace_ccu_snoop_pipe.sv | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index 775d47f..9530553 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -107,7 +107,7 @@ module ace_ccu_snoop_pipe logic st0_ar_accepts_shared; logic st0_pipe_valid; logic st0_pipe_ready; - logic st0_stall; + logic st0_hazard; logic st0_replay; slv_idx_t st0_slv_idx; st1_t st0_pipe; @@ -159,10 +159,13 @@ module ace_ccu_snoop_pipe // Stage 0 // {{{ - always_comb begin : stall_comb - st0_stall = 1'b1; + always_comb begin : hazard_comb + st0_hazard = 1'b1; st0_replay = 1'b0; + st0_tracker_check_o = 1'b0; + st0_replay_check_o = 1'b0; + if (!st0_tracker_full_i && st0_pipe_ready) begin // Check if there is any conflict on nline or ID (tracker) st0_tracker_check_o = 1'b1; @@ -175,7 +178,7 @@ module ace_ccu_snoop_pipe // the downstream buffers end else begin // The write is clear to go - st0_stall = 1'b0; + st0_hazard = 1'b0; end end else begin // The AX originates from AR @@ -183,7 +186,7 @@ module ace_ccu_snoop_pipe st0_replay_check_o = !st0_ax_is_replay; if (!st0_tracker_check_hit_i && !st0_replay_hit_i) begin // No conflict is detected - st0_stall = 1'b0; + st0_hazard = 1'b0; end else if (CcuCfg.u.ReplayEn) begin // Reads are replayable // ID or nline conflict is avoided by putting the request on hold @@ -194,8 +197,8 @@ module ace_ccu_snoop_pipe end // Handshaking logic - assign st0_ac_valid_o = st0_stall ? 1'b0 : st0_ax_valid; - assign st0_ax_ready = st0_stall ? st0_replay : st0_ac_ready_i; + assign st0_ac_valid_o = st0_hazard ? 1'b0 : st0_ax_valid; + assign st0_ax_ready = st0_hazard ? st0_replay : st0_ac_ready_i; // Allocations assign st0_tracker_alloc_o = st0_ax_valid && st0_ax_ready && !st0_replay; assign st0_tracker_alloc_b_o = st0_ax_is_write || st0_ax.atop[axi_pkg::ATOP_R_RESP]; From 8e79f7612b7452c8911a0cdb94c511d488a95e0d Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 18 Jun 2025 12:59:28 +0200 Subject: [PATCH 040/109] ccu_snoop_pipe: add stall performance events --- src/ccu/ace_ccu_snoop_pipe.sv | 11 ++++++++++- src/ccu/ace_ccu_top.sv | 4 +++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index 9530553..18b8360 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -80,7 +80,10 @@ module ace_ccu_snoop_pipe output logic st1_read_valid_o, input logic st1_read_ready_i, output logic st1_cd_ctrl_valid_o, - input logic st1_cd_ctrl_ready_i + input logic st1_cd_ctrl_ready_i, + + output logic evt_st0_stall_o, + output logic evt_st1_stall_o ); // Typedefs // {{{ @@ -335,4 +338,10 @@ module ace_ccu_snoop_pipe assign st1_ax_tid_o = st1.tid; // }}} + // Performance events + // {{{ + assign evt_st0_stall_o = st0_ax_valid && !st0_ax_ready; + assign evt_st1_stall_o = st1_valid && !st1_ready; + // }}} + endmodule diff --git a/src/ccu/ace_ccu_top.sv b/src/ccu/ace_ccu_top.sv index 298e124..89cc0a2 100644 --- a/src/ccu/ace_ccu_top.sv +++ b/src/ccu/ace_ccu_top.sv @@ -290,7 +290,9 @@ module ace_ccu_top .st1_read_valid_o (read_valid), .st1_read_ready_i (read_ready), .st1_cd_ctrl_valid_o (cd_ctrl_valid), - .st1_cd_ctrl_ready_i (cd_ctrl_ready) + .st1_cd_ctrl_ready_i (cd_ctrl_ready), + .evt_st0_stall_o (), + .evt_st1_stall_o () ); stream_fork_dynamic #( From 2c4704e59fe64d84bd302b329484b9aec00e51e9 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 18 Jun 2025 12:59:47 +0200 Subject: [PATCH 041/109] ccu_snoop_pipe: remove leftover assignments --- src/ccu/ace_ccu_snoop_pipe.sv | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index 18b8360..f901b96 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -238,12 +238,6 @@ module ace_ccu_snoop_pipe tid: st0_tracker_alloc_tid_i, ax: st0_ax }; - - // Replay allocation fields - assign replay_alloc_acsnoop_o = st0_ax_acsnoop; - assign replay_alloc_accepts_dirty_o = st0_ar_accepts_dirty; - assign replay_alloc_accepts_dirty_shared_o = st0_ar_accepts_dirty_shared; - assign replay_alloc_accepts_shared_o = st0_ar_accepts_shared; // }}} // Stage 1 From 917a9a9825655bc95d17d4aed7a8962baf9c768e Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 18 Jun 2025 13:00:18 +0200 Subject: [PATCH 042/109] ccu_top: add comment about replay table --- src/ccu/ace_ccu_top.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ccu/ace_ccu_top.sv b/src/ccu/ace_ccu_top.sv index 89cc0a2..b83fc61 100644 --- a/src/ccu/ace_ccu_top.sv +++ b/src/ccu/ace_ccu_top.sv @@ -398,6 +398,8 @@ module ace_ccu_top // Replay table // {{{ if (CcuCfg.u.ReplayEn) begin : gen_replay + // TODO: implement replay table to put AR requests + // with address conflicts on hold $fatal(-1, "Replay table not yet implemented."); end else begin : gen_no_replay assign replay_full = 1'b0; From 6b1c50d17ddbe97c75d78bc4dea9c762bbfb5aef Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 18 Jun 2025 13:00:45 +0200 Subject: [PATCH 043/109] ccu_tracker: add comments about deallocation logic --- src/ccu/ace_ccu_tracker.sv | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/ccu/ace_ccu_tracker.sv b/src/ccu/ace_ccu_tracker.sv index 55888d9..2a0175e 100644 --- a/src/ccu/ace_ccu_tracker.sv +++ b/src/ccu/ace_ccu_tracker.sv @@ -110,6 +110,10 @@ module ace_ccu_tracker // Dealloc logic // {{{ + + // Deallocation logic has some complexity due to the need of handling the rack and wack signals + // from all master, which cannot be stalled and can arrive in parallel in the same cycle + // TODO: can this be simplified? for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_dealloc slv_idx_t dealloc_slv_id; assign dealloc_slv_id = data_q[i].id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth]; @@ -135,6 +139,8 @@ module ace_ccu_tracker logic wack_queue_push; logic rack_queue_push; + // Push an entry ID to the wack/rack queues if the dealloc response matches the ID of the transaction + // that is being deallocated assign wack_queue_push = dealloc_b_resp_i && data_q[wack_queue_wdata].id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); assign rack_queue_push = dealloc_r_resp_i && data_q[rack_queue_wdata].id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); From 5eac9e84581d0317873dd4e46cba2e5669fda782 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 18 Jun 2025 13:54:02 +0200 Subject: [PATCH 044/109] ccu_tracker: fix width in signal declaration --- src/ccu/ace_ccu_tracker.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccu/ace_ccu_tracker.sv b/src/ccu/ace_ccu_tracker.sv index 2a0175e..e031080 100644 --- a/src/ccu/ace_ccu_tracker.sv +++ b/src/ccu/ace_ccu_tracker.sv @@ -85,8 +85,8 @@ module ace_ccu_tracker tid_t rack_queue_wdata; tid_t wack_queue_wdata; - tid_t [ CcuCfg.u.SlvPorts] rack_queue_rdata; - tid_t [ CcuCfg.u.SlvPorts] wack_queue_rdata; + tid_t [ CcuCfg.u.SlvPorts-1:0] rack_queue_rdata; + tid_t [ CcuCfg.u.SlvPorts-1:0] wack_queue_rdata; // }}} // Alloc logic From 5c45c032e7f0ce3824a1b38c8a428d4c1273a952 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 18 Jun 2025 13:54:31 +0200 Subject: [PATCH 045/109] ccu_snoop_pipe: fix formatting --- src/ccu/ace_ccu_snoop_pipe.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index f901b96..948e4b5 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -163,11 +163,11 @@ module ace_ccu_snoop_pipe // Stage 0 // {{{ always_comb begin : hazard_comb - st0_hazard = 1'b1; - st0_replay = 1'b0; + st0_hazard = 1'b1; + st0_replay = 1'b0; st0_tracker_check_o = 1'b0; - st0_replay_check_o = 1'b0; + st0_replay_check_o = 1'b0; if (!st0_tracker_full_i && st0_pipe_ready) begin // Check if there is any conflict on nline or ID (tracker) From cfe7c201dcd3a84c1c9eb84acba4a497c71ce58e Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 18 Jun 2025 19:19:58 +0200 Subject: [PATCH 046/109] ccu_snoop_pipe: fix missing signal declaration --- src/ccu/ace_ccu_snoop_pipe.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index 948e4b5..f9d6844 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -103,6 +103,7 @@ module ace_ccu_snoop_pipe ccu_ax_t st0_ax; logic st0_ax_valid; logic st0_ax_ready; + logic st0_ax_is_write; acsnoop_t st0_ax_acsnoop; axdomain_t st0_ax_domain; logic st0_ar_accepts_dirty; From 2990733b7a4b2921f8b6fce845976b5ad5ab7f05 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 19 Jun 2025 18:57:34 +0200 Subject: [PATCH 047/109] ace_pkg: fix `is_read_once` function --- src/ace_pkg.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index fd45f50..5d8a94a 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -176,7 +176,7 @@ package ace_pkg; endfunction function automatic logic is_read_once(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); - return (arbar0 == 1'b0 && ardomain inside {NonShareable} && arsnoop == ReadOnce); + return (arbar0 == 1'b0 && ardomain inside {InnerShareable, OuterShareable} && arsnoop == ReadOnce); endfunction function automatic logic is_read_shared(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); From af022791b90006683337c13d762a5d1c2754f054 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Jun 2025 09:57:22 +0200 Subject: [PATCH 048/109] ccu_snoop_pipe: fix tracker allocation during ATOPs --- src/ccu/ace_ccu_snoop_pipe.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index f9d6844..fcaf302 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -205,8 +205,8 @@ module ace_ccu_snoop_pipe assign st0_ax_ready = st0_hazard ? st0_replay : st0_ac_ready_i; // Allocations assign st0_tracker_alloc_o = st0_ax_valid && st0_ax_ready && !st0_replay; - assign st0_tracker_alloc_b_o = st0_ax_is_write || st0_ax.atop[axi_pkg::ATOP_R_RESP]; - assign st0_tracker_alloc_r_o = !st0_ax_is_write; + assign st0_tracker_alloc_b_o = st0_ax_is_write; + assign st0_tracker_alloc_r_o = !st0_ax_is_write || st0_ax.atop[axi_pkg::ATOP_R_RESP]; assign st0_tracker_alloc_nline_o = st0_ax.addr[CcuCfg.CachelineBytesIdxWidth+:CcuCfg.u.NLineWidth]; assign st0_tracker_alloc_id_o = st0_ax.id; assign st0_replay_alloc_o = st0_ax_valid && st0_ax_ready && st0_replay; From c1742beaad1a97b18f640fbf4e3e47f6ad3e5628 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Jun 2025 10:02:52 +0200 Subject: [PATCH 049/109] ccu_snoop_pipe: fix missing signals declaration --- src/ccu/ace_ccu_snoop_pipe.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index fcaf302..4406a6a 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -116,6 +116,8 @@ module ace_ccu_snoop_pipe slv_idx_t st0_slv_idx; st1_t st0_pipe; + logic st1_pipe_valid; + logic st1_pipe_ready; st1_t st1; logic st1_valid; logic st1_ready; From 5c6a90c0c315360f746939230340cd3ebf099975 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Jun 2025 10:05:09 +0200 Subject: [PATCH 050/109] ccu_tracker: add performance events and empty signal --- src/ccu/ace_ccu_top.sv | 9 ++++++--- src/ccu/ace_ccu_tracker.sv | 25 +++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/ccu/ace_ccu_top.sv b/src/ccu/ace_ccu_top.sv index b83fc61..0753b94 100644 --- a/src/ccu/ace_ccu_top.sv +++ b/src/ccu/ace_ccu_top.sv @@ -291,8 +291,8 @@ module ace_ccu_top .st1_read_ready_i (read_ready), .st1_cd_ctrl_valid_o (cd_ctrl_valid), .st1_cd_ctrl_ready_i (cd_ctrl_ready), - .evt_st0_stall_o (), - .evt_st1_stall_o () + .evt_st0_stall_o ( /*unused*/), + .evt_st1_stall_o ( /*unused*/) ); stream_fork_dynamic #( @@ -373,6 +373,7 @@ module ace_ccu_top .clk_i, .rst_ni, .full_o (tracker_full), + .empty_o ( /*unused*/), .check_i (tracker_check), .check_hit_o (tracker_check_hit), .alloc_i (tracker_alloc), @@ -390,7 +391,9 @@ module ace_ccu_top .dealloc_b_resp_id_i (tracker_dealloc_b_resp_id), .dealloc_b_resp_wb_o (tracker_dealloc_b_resp_wb), .updt_wb_i (tracker_updt_wb), - .updt_wb_tid_i (tracker_updt_wb_tid) + .updt_wb_tid_i (tracker_updt_wb_tid), + .evt_hit_id_o ( /*unused*/), + .evt_hit_nline_o ( /*unused*/) ); // }}} diff --git a/src/ccu/ace_ccu_tracker.sv b/src/ccu/ace_ccu_tracker.sv index e031080..396938e 100644 --- a/src/ccu/ace_ccu_tracker.sv +++ b/src/ccu/ace_ccu_tracker.sv @@ -24,6 +24,7 @@ module ace_ccu_tracker input logic rst_ni, output logic full_o, + output logic empty_o, // Check/alloc interface // {{{ @@ -49,8 +50,17 @@ module ace_ccu_tracker output logic dealloc_b_resp_wb_o, // }}} + // Writeback update interface + // {{{ input logic updt_wb_i, - input tid_t updt_wb_tid_i + input tid_t updt_wb_tid_i, + // }}} + + // Performance events + // {{{ + output logic evt_hit_id_o, + output logic evt_hit_nline_o + // }}} ); // Typedefs @@ -82,6 +92,8 @@ module ace_ccu_tracker logic [CcuCfg.u.MaxTransactions-1:0] hit_id_bv; logic [CcuCfg.u.MaxTransactions-1:0] hit_nline_bv; + logic hit_id; + logic hit_nline; tid_t rack_queue_wdata; tid_t wack_queue_wdata; @@ -215,7 +227,9 @@ module ace_ccu_tracker assign hit_nline_bv[i] = valid_q[i] && (data_q[i].nline == alloc_nline_i); end - assign check_hit_o = check_i && |{hit_id_bv, hit_nline_bv}; + assign hit_id = |hit_id_bv; + assign hit_nline = |hit_nline_bv; + assign check_hit_o = check_i && (hit_id || hit_nline); // }}} // Writeback logic @@ -231,6 +245,13 @@ module ace_ccu_tracker // Global control // {{{ assign full_o = (valid_q == '1); + assign empty_o = (valid_q == '0); + // }}} + + // Performance events + // {{{ + assign evt_hit_id_o = check_i && hit_id; + assign evt_hit_nline_o = check_i && hit_nline; // }}} endmodule From 707a9ff5c1b67c30b6d71eb09b3d051e72eaf403 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Jun 2025 10:05:40 +0200 Subject: [PATCH 051/109] ccu_top: rename write unit instance --- src/ccu/ace_ccu_top.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ace_ccu_top.sv b/src/ccu/ace_ccu_top.sv index 0753b94..9b1d55a 100644 --- a/src/ccu/ace_ccu_top.sv +++ b/src/ccu/ace_ccu_top.sv @@ -421,7 +421,7 @@ module ace_ccu_top .ccu_aw_t(ccu_axi_aw_t), .w_t (w_t), .ccu_b_t (ccu_axi_b_t) - ) u_ace_ccu_write ( + ) u_ace_ccu_write_unit ( .clk_i, .rst_ni, .valid_i (write_valid), From 36b9ecc4c026612717598512f96a6450fcb69d25 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Jun 2025 10:39:08 +0200 Subject: [PATCH 052/109] ace_pkg: use more vendorized nomeclature for ACE util functions --- src/ace_pkg.sv | 173 ++++++++++++++++++---------------- src/ccu/ace_ccu_ax_arbiter.sv | 10 +- src/ccu/ace_ccu_frontend.sv | 4 +- 3 files changed, 101 insertions(+), 86 deletions(-) diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index 5d8a94a..6db7217 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -112,8 +112,8 @@ package ace_pkg; /////////////// // AWSNOOP decoding - function automatic logic is_write_no_snoop(logic awbar0, axdomain_t awdomain, - awsnoop_t awsnoop); + function automatic logic ace_is_write_no_snoop(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); return ( awbar0 == 1'b0 && awdomain inside {NonShareable, System} && @@ -121,7 +121,8 @@ package ace_pkg; ); endfunction - function automatic logic is_write_unique(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + function automatic logic ace_is_write_unique(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); return ( awbar0 == 1'b0 && awdomain inside {InnerShareable, OuterShareable} && @@ -129,8 +130,8 @@ package ace_pkg; ); endfunction - function automatic logic is_write_line_unique(logic awbar0, axdomain_t awdomain, - awsnoop_t awsnoop); + function automatic logic ace_is_write_line_unique(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); return ( awbar0 == 1'b0 && awdomain inside {InnerShareable, OuterShareable} && @@ -138,7 +139,8 @@ package ace_pkg; ); endfunction - function automatic logic is_write_clean(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + function automatic logic ace_is_write_clean(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); return ( awbar0 == 1'b0 && awdomain inside {NonShareable, InnerShareable, OuterShareable} && @@ -146,7 +148,8 @@ package ace_pkg; ); endfunction - function automatic logic is_write_back(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + function automatic logic ace_is_write_back(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); return ( awbar0 == 1'b0 && awdomain inside {NonShareable, InnerShareable, OuterShareable} && @@ -154,7 +157,7 @@ package ace_pkg; ); endfunction - function automatic logic is_evict(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + function automatic logic ace_is_evict(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); return ( awbar0 == 1'b0 && awdomain inside {InnerShareable, OuterShareable} && @@ -162,7 +165,8 @@ package ace_pkg; ); endfunction - function automatic logic is_write_evict(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + function automatic logic ace_is_write_evict(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); return ( awbar0 == 1'b0 && awdomain inside {NonShareable, InnerShareable, OuterShareable} && @@ -171,15 +175,17 @@ package ace_pkg; endfunction // ARSNOOP decoding - function automatic logic is_read_no_snoop(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_read_no_snoop(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return (arbar0 == 1'b0 && ardomain inside {NonShareable, System} && arsnoop == ReadNoSnoop); endfunction - function automatic logic is_read_once(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_read_once(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); return (arbar0 == 1'b0 && ardomain inside {InnerShareable, OuterShareable} && arsnoop == ReadOnce); endfunction - function automatic logic is_read_shared(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_read_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return ( arbar0 == 1'b0 && ardomain inside {InnerShareable, OuterShareable} && @@ -187,7 +193,8 @@ package ace_pkg; ); endfunction - function automatic logic is_read_clean(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_read_clean(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return ( arbar0 == 1'b0 && ardomain inside {InnerShareable, OuterShareable} && @@ -195,8 +202,8 @@ package ace_pkg; ); endfunction - function automatic logic is_read_not_shared_dirty(logic arbar0, axdomain_t ardomain, - arsnoop_t arsnoop); + function automatic logic ace_is_read_not_shared_dirty(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return ( arbar0 == 1'b0 && ardomain inside {InnerShareable, OuterShareable} && @@ -204,7 +211,8 @@ package ace_pkg; ); endfunction - function automatic logic is_read_unique(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_read_unique(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return ( arbar0 == 1'b0 && ardomain inside {InnerShareable, OuterShareable} && @@ -212,7 +220,8 @@ package ace_pkg; ); endfunction - function automatic logic is_clean_unique(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_clean_unique(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return ( arbar0 == 1'b0 && ardomain inside {InnerShareable, OuterShareable} && @@ -220,7 +229,8 @@ package ace_pkg; ); endfunction - function automatic logic is_make_unique(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_make_unique(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return ( arbar0 == 1'b0 && ardomain inside {InnerShareable, OuterShareable} && @@ -228,7 +238,8 @@ package ace_pkg; ); endfunction - function automatic logic is_clean_shared(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_clean_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return ( arbar0 == 1'b0 && ardomain inside {NonShareable, InnerShareable, OuterShareable} && @@ -236,7 +247,8 @@ package ace_pkg; ); endfunction - function automatic logic is_clean_invalid(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_clean_invalid(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return ( arbar0 == 1'b0 && ardomain inside {NonShareable, InnerShareable, OuterShareable} && @@ -244,7 +256,8 @@ package ace_pkg; ); endfunction - function automatic logic is_make_invalid(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_is_make_invalid(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); return ( arbar0 == 1'b0 && ardomain inside {NonShareable, InnerShareable, OuterShareable} && @@ -254,122 +267,124 @@ package ace_pkg; // Transaction groups - function automatic logic aw_is_coherent(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + function automatic logic ace_aw_is_coherent(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); logic retval; unique case (1'b1) - is_write_unique(awbar0, awdomain, awsnoop): retval = 1'b1; - is_write_line_unique(awbar0, awdomain, awsnoop): retval = 1'b1; - default: retval = 1'b0; + ace_is_write_unique(awbar0, awdomain, awsnoop): retval = 1'b1; + ace_is_write_line_unique(awbar0, awdomain, awsnoop): retval = 1'b1; + default: retval = 1'b0; endcase return retval; endfunction - function automatic logic aw_is_memory_update(logic awbar0, axdomain_t awdomain, - awsnoop_t awsnoop); + function automatic logic ace_aw_is_memory_update(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); logic retval; unique case (1'b1) - is_write_clean(awbar0, awdomain, awsnoop): retval = 1'b1; - is_write_back(awbar0, awdomain, awsnoop): retval = 1'b1; - is_evict(awbar0, awdomain, awsnoop): retval = 1'b1; - is_write_evict(awbar0, awdomain, awsnoop): retval = 1'b1; - default: retval = 1'b0; + ace_is_write_clean(awbar0, awdomain, awsnoop): retval = 1'b1; + ace_is_write_back(awbar0, awdomain, awsnoop): retval = 1'b1; + ace_is_evict(awbar0, awdomain, awsnoop): retval = 1'b1; + ace_is_write_evict(awbar0, awdomain, awsnoop): retval = 1'b1; + default: retval = 1'b0; endcase return retval; endfunction - function automatic logic aw_is_non_blocking(logic awbar0, axdomain_t awdomain, - awsnoop_t awsnoop); + function automatic logic ace_aw_is_non_blocking(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); logic retval; unique case (1'b1) - aw_is_memory_update(awbar0, awdomain, awsnoop): retval = 1'b1; - is_write_no_snoop(awbar0, awdomain, awsnoop): retval = 1'b1; - default: retval = 1'b0; + ace_aw_is_memory_update(awbar0, awdomain, awsnoop): retval = 1'b1; + ace_is_write_no_snoop(awbar0, awdomain, awsnoop): retval = 1'b1; + default: retval = 1'b0; endcase return retval; endfunction - function automatic logic ar_is_coherent(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + function automatic logic ace_ar_is_coherent(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); logic retval; unique case (1'b1) - is_read_once(arbar0, ardomain, arsnoop): retval = 1'b1; - is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - is_read_clean(arbar0, ardomain, arsnoop): retval = 1'b1; - is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; - is_read_unique(arbar0, ardomain, arsnoop): retval = 1'b1; - is_clean_unique(arbar0, ardomain, arsnoop): retval = 1'b1; - is_make_unique(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; + ace_is_read_once(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_read_clean(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_read_unique(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_clean_unique(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_make_unique(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; endcase return retval; endfunction - function automatic logic ar_is_cache_maintenance(logic arbar0, axdomain_t ardomain, - arsnoop_t arsnoop); + function automatic logic ace_ar_is_cache_maintenance(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); logic retval; unique case (1'b1) - is_clean_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - is_clean_invalid(arbar0, ardomain, arsnoop): retval = 1'b1; - is_make_invalid(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; + ace_is_clean_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_clean_invalid(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_make_invalid(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; endcase return retval; endfunction // Snoop transaction from initiating master transaction - function automatic acsnoop_t ar_acsnoop_map(logic arbar0, axdomain_t ardomain, - arsnoop_t arsnoop, logic arlock); + function automatic acsnoop_t ace_ar_acsnoop_map(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop, logic arlock); acsnoop_t acsnoop; unique case (1'b1) - is_clean_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(CleanInvalid); - is_make_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(MakeInvalid); - default: acsnoop = acsnoop_t'(arsnoop); + ace_is_clean_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(CleanInvalid); + ace_is_make_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(MakeInvalid); + default: acsnoop = acsnoop_t'(arsnoop); endcase // Hacky way to support AMOs in Culsans with the legacy WB cache - if (arlock && is_read_once(arbar0, ardomain, arsnoop)) acsnoop = acsnoop_t'(CleanInvalid); + if (arlock && ace_is_read_once(arbar0, ardomain, arsnoop)) acsnoop = acsnoop_t'(CleanInvalid); return acsnoop; endfunction - function automatic acsnoop_t aw_acsnoop_map(logic awbar0, axdomain_t awdomain, - arsnoop_t awsnoop); + function automatic acsnoop_t ace_aw_acsnoop_map(logic awbar0, axdomain_t awdomain, + arsnoop_t awsnoop); acsnoop_t acsnoop; unique case (1'b1) - is_write_unique(awbar0, awdomain, awsnoop): acsnoop = acsnoop_t'(CleanInvalid); - is_write_line_unique(awbar0, awdomain, awsnoop): acsnoop = acsnoop_t'(MakeInvalid); - default: acsnoop = acsnoop_t'(CleanInvalid); + ace_is_write_unique(awbar0, awdomain, awsnoop): acsnoop = acsnoop_t'(CleanInvalid); + ace_is_write_line_unique(awbar0, awdomain, awsnoop): acsnoop = acsnoop_t'(MakeInvalid); + default: acsnoop = acsnoop_t'(CleanInvalid); endcase return acsnoop; endfunction - function automatic logic ar_resp_accepts_dirty(logic arbar0, axdomain_t ardomain, - arsnoop_t arsnoop); + function automatic logic ace_ar_accepts_dirty(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); logic retval; unique case (1'b1) - is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; - is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - is_read_unique(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; + ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_read_unique(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; endcase return retval; endfunction - function automatic logic ar_resp_accepts_dirty_shared(logic arbar0, axdomain_t ardomain, - arsnoop_t arsnoop); + function automatic logic ace_ar_accepts_dirty_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); logic retval; unique case (1'b1) - is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; + ace_is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; endcase return retval; endfunction - function automatic logic ar_resp_accepts_shared(logic arbar0, axdomain_t ardomain, - arsnoop_t arsnoop); + function automatic logic ace_ar_accepts_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); logic retval; unique case (1'b1) - is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; - is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - is_read_clean(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; + ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; + ace_is_read_clean(arbar0, ardomain, arsnoop): retval = 1'b1; + default: retval = 1'b0; endcase return retval; endfunction diff --git a/src/ccu/ace_ccu_ax_arbiter.sv b/src/ccu/ace_ccu_ax_arbiter.sv index 9825e87..a367d2d 100644 --- a/src/ccu/ace_ccu_ax_arbiter.sv +++ b/src/ccu/ace_ccu_ax_arbiter.sv @@ -70,21 +70,21 @@ module ace_ccu_ax_arbiter // {{{ assign ar_muxed = ax_is_replay_o ? replay_ar_i : ar_i; // ACSNOOP computed from AWSNOOP - assign aw_acsnoop = aw_acsnoop_map(aw_i.bar[0], aw_i.domain, aw_i.snoop); + assign aw_acsnoop = ace_aw_acsnoop_map(aw_i.bar[0], aw_i.domain, aw_i.snoop); // ACSNOOP computed from ARSNOOP - assign ar_acsnoop = ar_acsnoop_map( + assign ar_acsnoop = ace_ar_acsnoop_map( ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop, ar_muxed.lock ); // Read transaction can accept a cacheline in Dirty state - assign ar_accepts_dirty = ar_resp_accepts_dirty( + assign ar_accepts_dirty = ace_ar_accepts_dirty( ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop ); // Read transaction can accept a cacheline in Dirty and Shared state - assign ar_accepts_dirty_shared = ar_resp_accepts_dirty_shared( + assign ar_accepts_dirty_shared = ace_ar_accepts_dirty_shared( ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop ); // Read transaction can accept a cacheline in Shared state - assign ar_accepts_shared = ar_resp_accepts_shared( + assign ar_accepts_shared = ace_ar_accepts_shared( ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop ); // Mux output signals diff --git a/src/ccu/ace_ccu_frontend.sv b/src/ccu/ace_ccu_frontend.sv index f205060..e01bcbe 100644 --- a/src/ccu/ace_ccu_frontend.sv +++ b/src/ccu/ace_ccu_frontend.sv @@ -97,11 +97,11 @@ module ace_ccu_frontend ); // Separate in each port blocking and non-blocking traffic - assign aw_is_nonblocking = aw_is_non_blocking( + assign aw_is_nonblocking = ace_aw_is_non_blocking( slv_req_cut[i].aw.bar[0], slv_req_cut[i].aw.domain, slv_req_cut[i].aw.snoop ); - assign ar_is_read_no_snoop = is_read_no_snoop( + assign ar_is_read_no_snoop = ace_is_read_no_snoop( slv_req_cut[i].ar.bar[0], slv_req_cut[i].ar.domain, slv_req_cut[i].ar.snoop ); From e0c2aa81c47b2f3927e9419b416001960a42610f Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Sat, 21 Jun 2025 11:17:04 +0200 Subject: [PATCH 053/109] ace_pkg: fix formatting based on verible --- src/ace_pkg.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index 6db7217..be21130 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -340,7 +340,8 @@ package ace_pkg; default: acsnoop = acsnoop_t'(arsnoop); endcase // Hacky way to support AMOs in Culsans with the legacy WB cache - if (arlock && ace_is_read_once(arbar0, ardomain, arsnoop)) acsnoop = acsnoop_t'(CleanInvalid); + if (arlock && ace_is_read_once(arbar0, ardomain, arsnoop)) + acsnoop = acsnoop_t'(CleanInvalid); return acsnoop; endfunction From 6f136aacc8a400f4ff7fb363c66e54cf4f708220 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 26 Jun 2025 11:24:49 +0200 Subject: [PATCH 054/109] treewide: use ID bit to mark writebacks Writebacks are marked with unique IDs to differentiate them from mst-issued transactions. To ensure writebacks are ordered before associated AW transactions, a FSM waits for the completion of the writeback. --- src/ccu/ace_ccu_ax_arbiter.sv | 80 ++++++------ src/ccu/ace_ccu_cd_ctrl.sv | 40 +++--- src/ccu/ace_ccu_frontend.sv | 64 +++++----- src/ccu/ace_ccu_pkg.sv | 8 +- src/ccu/ace_ccu_read.sv | 34 +++--- src/ccu/ace_ccu_snoop_pipe.sv | 150 +++++++++++------------ src/ccu/ace_ccu_top.sv | 221 ++++++++++++++++------------------ src/ccu/ace_ccu_tracker.sv | 84 +++++-------- src/ccu/ace_ccu_write.sv | 162 ++++++++++++++----------- 9 files changed, 419 insertions(+), 424 deletions(-) diff --git a/src/ccu/ace_ccu_ax_arbiter.sv b/src/ccu/ace_ccu_ax_arbiter.sv index a367d2d..376155d 100644 --- a/src/ccu/ace_ccu_ax_arbiter.sv +++ b/src/ccu/ace_ccu_ax_arbiter.sv @@ -15,55 +15,55 @@ module ace_ccu_ax_arbiter import ace_pkg::*; import ace_ccu_pkg::*; #( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type ccu_aw_t = logic, - parameter type ccu_ar_t = logic, - parameter type ccu_ax_t = logic + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type midend_aw_t = logic, + parameter type midend_ar_t = logic, + parameter type midend_ax_t = logic ) ( input logic clk_i, input logic rst_ni, input logic replay_full_i, - input ccu_aw_t aw_i, - input logic aw_valid_i, - output logic aw_ready_o, - input ccu_ar_t ar_i, - input logic ar_valid_i, - output logic ar_ready_o, - input ccu_ar_t replay_ar_i, - input logic replay_ar_valid_i, - output logic replay_ar_ready_o, + input midend_aw_t aw_i, + input logic aw_valid_i, + output logic aw_ready_o, + input midend_ar_t ar_i, + input logic ar_valid_i, + output logic ar_ready_o, + input midend_ar_t replay_ar_i, + input logic replay_ar_valid_i, + output logic replay_ar_ready_o, - output ccu_ax_t ax_o, - output logic ax_valid_o, - input logic ax_ready_i, - output logic ax_is_write_o, - output logic ax_is_replay_o, - output acsnoop_t ax_acsnoop_o, - output logic ar_accepts_dirty_o, - output logic ar_accepts_dirty_shared_o, - output logic ar_accepts_shared_o, - output axdomain_t ax_domain_o + output midend_ax_t ax_o, + output logic ax_valid_o, + input logic ax_ready_i, + output logic ax_is_write_o, + output logic ax_is_replay_o, + output acsnoop_t ax_acsnoop_o, + output logic ar_accepts_dirty_o, + output logic ar_accepts_dirty_shared_o, + output logic ar_accepts_shared_o, + output axdomain_t ax_domain_o ); // Internal signals // {{{ - ccu_ar_t ar_muxed; - ccu_ax_t aw_in; - ccu_ax_t ar_in; - ccu_ax_t ax; - ccu_ax_t replay_ar; - logic ax_valid; - logic ax_ready; - logic ax_arb_valid; - logic ax_arb_ready; - logic ax_is_write; - acsnoop_t aw_acsnoop; - acsnoop_t ar_acsnoop; - logic ar_accepts_dirty; - logic ar_accepts_dirty_shared; - logic ar_accepts_shared; + midend_ar_t ar_muxed; + midend_ax_t aw_in; + midend_ax_t ar_in; + midend_ax_t ax; + midend_ax_t replay_ar; + logic ax_valid; + logic ax_ready; + logic ax_arb_valid; + logic ax_arb_ready; + logic ax_is_write; + acsnoop_t aw_acsnoop; + acsnoop_t ar_acsnoop; + logic ar_accepts_dirty; + logic ar_accepts_dirty_shared; + logic ar_accepts_shared; // }}} // Coherence decoding @@ -122,7 +122,7 @@ module ace_ccu_ax_arbiter rr_arb_tree #( .NumIn (2), - .DataType (ccu_ax_t), + .DataType (midend_ax_t), .AxiVldRdy(1'b1), .LockIn (1'b1) ) u_ax_arbiter ( @@ -153,7 +153,7 @@ module ace_ccu_ax_arbiter rr_arb_tree #( .NumIn (2), - .DataType (ccu_ax_t), + .DataType (midend_ax_t), .AxiVldRdy(1'b1), .LockIn (1'b0), .ExtPrio (1'b1) diff --git a/src/ccu/ace_ccu_cd_ctrl.sv b/src/ccu/ace_ccu_cd_ctrl.sv index 7238bcf..9e5d94e 100644 --- a/src/ccu/ace_ccu_cd_ctrl.sv +++ b/src/ccu/ace_ccu_cd_ctrl.sv @@ -15,28 +15,28 @@ module ace_ccu_cd_ctrl import ace_pkg::*; import ace_ccu_pkg::*; #( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type ccu_ax_t = logic, - parameter type ccu_id_t = logic, - parameter type user_t = logic, - parameter type cd_t = logic, - parameter type slv_bv_t = logic, - parameter type w_t = logic, - parameter type ccu_r_t = logic + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type midend_ax_t = logic, + parameter type midend_id_t = logic, + parameter type user_t = logic, + parameter type cd_t = logic, + parameter type slv_bv_t = logic, + parameter type w_t = logic, + parameter type midend_r_t = logic ) ( input logic clk_i, input logic rst_ni, // Ctrl - input logic valid_i, - output logic ready_o, - input ccu_ax_t ax_i, - input logic cd_ctrl_write_i, - input logic cd_ctrl_read_i, - input slv_bv_t cd_bv_i, - input logic r_resp_shared_i, - input logic r_resp_dirty_i, + input logic valid_i, + output logic ready_o, + input midend_ax_t ax_i, + input logic cd_ctrl_write_i, + input logic cd_ctrl_read_i, + input slv_bv_t cd_bv_i, + input logic r_resp_shared_i, + input logic r_resp_dirty_i, // CD snoop channel input cd_t [CcuCfg.u.SlvPorts-1:0] cd_i, @@ -49,16 +49,16 @@ module ace_ccu_cd_ctrl input logic w_ready_i, // Slv interface - output ccu_r_t r_o, - output logic r_valid_o, - input logic r_ready_i + output midend_r_t r_o, + output logic r_valid_o, + input logic r_ready_i ); // Typedefs // {{{ typedef logic [CcuCfg.CachelineAxiTransfersIdxWidth-1:0] cl_axi_trans_idx_t; typedef struct packed { - ccu_id_t id; + midend_id_t id; logic cd_ctrl_write; logic cd_ctrl_read; cl_axi_trans_idx_t r_cd_start_trans; diff --git a/src/ccu/ace_ccu_frontend.sv b/src/ccu/ace_ccu_frontend.sv index e01bcbe..76f663a 100644 --- a/src/ccu/ace_ccu_frontend.sv +++ b/src/ccu/ace_ccu_frontend.sv @@ -13,22 +13,22 @@ module ace_ccu_frontend import ace_pkg::*; import ace_ccu_pkg::*; #( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type slv_bv_t = logic, - parameter type slv_idx_t = logic, - parameter type slv_aw_t = logic, - parameter type w_t = logic, - parameter type slv_b_t = logic, - parameter type slv_ar_t = logic, - parameter type slv_r_t = logic, - parameter type slv_req_t = logic, - parameter type slv_resp_t = logic, - parameter type ccu_aw_t = logic, - parameter type ccu_b_t = logic, - parameter type ccu_ar_t = logic, - parameter type ccu_r_t = logic, - parameter type ccu_req_t = logic, - parameter type ccu_resp_t = logic + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type slv_bv_t = logic, + parameter type slv_idx_t = logic, + parameter type slv_aw_t = logic, + parameter type w_t = logic, + parameter type slv_b_t = logic, + parameter type slv_ar_t = logic, + parameter type slv_r_t = logic, + parameter type slv_req_t = logic, + parameter type slv_resp_t = logic, + parameter type midend_aw_t = logic, + parameter type midend_b_t = logic, + parameter type midend_ar_t = logic, + parameter type midend_r_t = logic, + parameter type midend_req_t = logic, + parameter type midend_resp_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -36,10 +36,10 @@ module ace_ccu_frontend input slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_req_i, output slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_resp_o, - output ccu_req_t ccu_nonshareable_req_o, - input ccu_resp_t ccu_nonshareable_resp_i, - output ccu_req_t ccu_shareable_req_o, - input ccu_resp_t ccu_shareable_resp_i, + output midend_req_t ccu_nonshareable_req_o, + input midend_resp_t ccu_nonshareable_resp_i, + output midend_req_t ccu_shareable_req_o, + input midend_resp_t ccu_shareable_resp_i, output slv_bv_t ccu_shareable_rack_o, output slv_bv_t ccu_shareable_wack_o @@ -135,18 +135,18 @@ module ace_ccu_frontend axi_mux #( .SlvAxiIDWidth(CcuCfg.u.AxiSlvIdWidth), .slv_aw_chan_t(slv_aw_t), - .mst_aw_chan_t(ccu_aw_t), + .mst_aw_chan_t(midend_aw_t), .w_chan_t (w_t), .slv_b_chan_t (slv_b_t), - .mst_b_chan_t (ccu_b_t), + .mst_b_chan_t (midend_b_t), .slv_ar_chan_t(slv_ar_t), - .mst_ar_chan_t(ccu_ar_t), + .mst_ar_chan_t(midend_ar_t), .slv_r_chan_t (slv_r_t), - .mst_r_chan_t (ccu_r_t), + .mst_r_chan_t (midend_r_t), .slv_req_t (slv_req_t), .slv_resp_t (slv_resp_t), - .mst_req_t (ccu_req_t), - .mst_resp_t (ccu_resp_t), + .mst_req_t (midend_req_t), + .mst_resp_t (midend_resp_t), .NoSlvPorts (CcuCfg.u.SlvPorts), .MaxWTrans (32'd8), .FallThrough (1'b1), @@ -171,18 +171,18 @@ module ace_ccu_frontend axi_mux #( .SlvAxiIDWidth(CcuCfg.u.AxiSlvIdWidth), .slv_aw_chan_t(slv_aw_t), - .mst_aw_chan_t(ccu_aw_t), + .mst_aw_chan_t(midend_aw_t), .w_chan_t (w_t), .slv_b_chan_t (slv_b_t), - .mst_b_chan_t (ccu_b_t), + .mst_b_chan_t (midend_b_t), .slv_ar_chan_t(slv_ar_t), - .mst_ar_chan_t(ccu_ar_t), + .mst_ar_chan_t(midend_ar_t), .slv_r_chan_t (slv_r_t), - .mst_r_chan_t (ccu_r_t), + .mst_r_chan_t (midend_r_t), .slv_req_t (slv_req_t), .slv_resp_t (slv_resp_t), - .mst_req_t (ccu_req_t), - .mst_resp_t (ccu_resp_t), + .mst_req_t (midend_req_t), + .mst_resp_t (midend_resp_t), .NoSlvPorts (CcuCfg.u.SlvPorts), .MaxWTrans (32'd8), .FallThrough (1'b1), diff --git a/src/ccu/ace_ccu_pkg.sv b/src/ccu/ace_ccu_pkg.sv index 797bfaa..7b6673e 100644 --- a/src/ccu/ace_ccu_pkg.sv +++ b/src/ccu/ace_ccu_pkg.sv @@ -53,7 +53,8 @@ package ace_ccu_pkg; int unsigned CachelineAddrWidth; int unsigned CachelineAxiTransfers; int unsigned CachelineAxiTransfersIdxWidth; - int unsigned AxiCcuIdWidth; + int unsigned AxiMidendIdWidth; + int unsigned AxiBackendIdWidth; int unsigned AxiDataBytes; int unsigned AxiDataBytesIdxWidth; int unsigned AxiStrbWidth; @@ -76,8 +77,9 @@ package ace_ccu_pkg; p.AxiDataBytes = u.AxiDataWidth / 8; p.AxiDataBytesIdxWidth = $clog2(p.AxiDataBytes); p.AxiStrbWidth = u.AxiDataWidth / 8; - p.AxiCcuIdWidth = u.AxiSlvIdWidth + p.SlvPortIdxWidth; - p.AxiMstIdWidth = p.AxiCcuIdWidth + 1; + p.AxiMidendIdWidth = u.AxiSlvIdWidth + p.SlvPortIdxWidth; + p.AxiBackendIdWidth = p.AxiMidendIdWidth + 1; + p.AxiMstIdWidth = p.AxiBackendIdWidth + 1; return p; endfunction diff --git a/src/ccu/ace_ccu_read.sv b/src/ccu/ace_ccu_read.sv index 020674c..46aa8f7 100644 --- a/src/ccu/ace_ccu_read.sv +++ b/src/ccu/ace_ccu_read.sv @@ -17,47 +17,47 @@ module ace_ccu_read import ace_ccu_pkg::*; #( parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type ccu_ax_t = logic, + parameter type midend_ax_t = logic, parameter type tid_t = logic, - parameter type ccu_axi_ar_t = logic, - parameter type ccu_axi_r_t = logic, - parameter type ccu_ace_r_t = logic + parameter type backend_ar_t = logic, + parameter type backend_r_t = logic, + parameter type midend_r_t = logic ) ( input logic clk_i, input logic rst_ni, // Ctrl - input logic valid_i, - output logic ready_o, - input ccu_ax_t ax_i, + input logic valid_i, + output logic ready_o, + input midend_ax_t ax_i, // Snp interface - input ccu_ace_r_t cd_r_i, - input logic cd_r_valid_i, - output logic cd_r_ready_o, + input midend_r_t cd_r_i, + input logic cd_r_valid_i, + output logic cd_r_ready_o, // Slv interface - output ccu_ace_r_t r_o, + output midend_r_t r_o, output logic r_valid_o, input logic r_ready_i, // Mst interface - output ccu_axi_ar_t ar_o, + output backend_ar_t ar_o, output logic ar_valid_o, input logic ar_ready_i, - input ccu_axi_r_t r_i, + input backend_r_t r_i, input logic r_valid_i, output logic r_ready_o ); - ccu_axi_ar_t ar_sync_wdata; - ccu_ace_r_t mem_r; + backend_ar_t ar_sync_wdata; + midend_r_t mem_r; // AR channel // {{{ `AXI_ASSIGN_AR_STRUCT(ar_sync_wdata, ax_i) fall_through_register #( - .T(ccu_axi_ar_t) + .T(backend_ar_t) ) u_ar_sync_reg ( .clk_i, .rst_ni, @@ -78,7 +78,7 @@ module ace_ccu_read rr_arb_tree #( .NumIn (2), - .DataType (ccu_ace_r_t), + .DataType (midend_r_t), .AxiVldRdy(1'b1), .LockIn (1'b1) ) u_r_arbiter ( diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index 4406a6a..75905c9 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -15,10 +15,10 @@ module ace_ccu_snoop_pipe #( parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, parameter type domain_rule_t = logic, - parameter type ccu_aw_t = logic, - parameter type ccu_ar_t = logic, - parameter type ccu_ax_t = logic, - parameter type ccu_id_t = logic, + parameter type midend_aw_t = logic, + parameter type midend_ar_t = logic, + parameter type midend_ax_t = logic, + parameter type midend_id_t = logic, parameter type ac_t = logic, parameter type cr_t = logic, parameter type slv_bv_t = logic, @@ -29,17 +29,17 @@ module ace_ccu_snoop_pipe input logic clk_i, input logic rst_ni, - input ccu_aw_t st0_aw_i, - input logic st0_aw_valid_i, - output logic st0_aw_ready_o, + input midend_aw_t st0_aw_i, + input logic st0_aw_valid_i, + output logic st0_aw_ready_o, - input ccu_ar_t st0_ar_i, - input logic st0_ar_valid_i, - output logic st0_ar_ready_o, + input midend_ar_t st0_ar_i, + input logic st0_ar_valid_i, + output logic st0_ar_ready_o, - input ccu_ar_t st0_replay_ar_i, - input logic st0_replay_ar_valid_i, - output logic st0_replay_ar_ready_o, + input midend_ar_t st0_replay_ar_i, + input logic st0_replay_ar_valid_i, + output logic st0_replay_ar_ready_o, output ac_t st0_ac_o, output slv_bv_t st0_ac_bv_o, @@ -56,31 +56,31 @@ module ace_ccu_snoop_pipe input logic st0_replay_hit_i, output logic st0_replay_alloc_o, - input logic st0_tracker_full_i, - output logic st0_tracker_check_o, - input logic st0_tracker_check_hit_i, - output logic st0_tracker_alloc_o, - output logic st0_tracker_alloc_b_o, - output logic st0_tracker_alloc_r_o, - output nline_t st0_tracker_alloc_nline_o, - output ccu_id_t st0_tracker_alloc_id_o, - input tid_t st0_tracker_alloc_tid_i, + input logic st0_tracker_full_i, + output logic st0_tracker_check_o, + input logic st0_tracker_check_hit_i, + output logic st0_tracker_alloc_o, + output logic st0_tracker_alloc_b_o, + output logic st0_tracker_alloc_r_o, + output nline_t st0_tracker_alloc_nline_o, + output midend_id_t st0_tracker_alloc_id_o, + input tid_t st0_tracker_alloc_tid_i, input domain_rule_t [CcuCfg.u.SlvPorts-1:0] st0_domain_rule_i, - output ccu_ax_t st1_ax_o, - output logic st1_ax_is_write_o, - output logic st1_r_resp_shared_o, - output logic st1_r_resp_dirty_o, - output tid_t st1_ax_tid_o, - output logic st1_cd_ctrl_write_o, - output logic st1_cd_ctrl_read_o, - output logic st1_write_valid_o, - input logic st1_write_ready_i, - output logic st1_read_valid_o, - input logic st1_read_ready_i, - output logic st1_cd_ctrl_valid_o, - input logic st1_cd_ctrl_ready_i, + output midend_ax_t st1_ax_o, + output logic st1_ax_is_write_o, + output logic st1_r_resp_shared_o, + output logic st1_r_resp_dirty_o, + output tid_t st1_ax_tid_o, + output logic st1_cd_ctrl_write_o, + output logic st1_cd_ctrl_read_o, + output logic st1_write_valid_o, + input logic st1_write_ready_i, + output logic st1_read_valid_o, + input logic st1_read_ready_i, + output logic st1_cd_ctrl_valid_o, + input logic st1_cd_ctrl_ready_i, output logic evt_st0_stall_o, output logic evt_st1_stall_o @@ -88,55 +88,55 @@ module ace_ccu_snoop_pipe // Typedefs // {{{ typedef struct packed { - logic ax_is_write; - logic ar_accepts_dirty; - logic ar_accepts_dirty_shared; - logic ar_accepts_shared; - slv_bv_t cr_bv; - tid_t tid; - ccu_ax_t ax; + logic ax_is_write; + logic ar_accepts_dirty; + logic ar_accepts_dirty_shared; + logic ar_accepts_shared; + slv_bv_t cr_bv; + tid_t tid; + midend_ax_t ax; } st1_t; // }}} // Internal signals // {{{ - ccu_ax_t st0_ax; - logic st0_ax_valid; - logic st0_ax_ready; - logic st0_ax_is_write; - acsnoop_t st0_ax_acsnoop; - axdomain_t st0_ax_domain; - logic st0_ar_accepts_dirty; - logic st0_ar_accepts_dirty_shared; - logic st0_ar_accepts_shared; - logic st0_pipe_valid; - logic st0_pipe_ready; - logic st0_hazard; - logic st0_replay; - slv_idx_t st0_slv_idx; - st1_t st0_pipe; - - logic st1_pipe_valid; - logic st1_pipe_ready; - st1_t st1; - logic st1_valid; - logic st1_ready; - logic st1_r_resp_shared; - logic st1_r_resp_dirty; - logic st1_aw_sel; - logic st1_ar_sel; - logic st1_cd_sel; - logic st1_cd_write; - logic st1_cd_read; + midend_ax_t st0_ax; + logic st0_ax_valid; + logic st0_ax_ready; + logic st0_ax_is_write; + acsnoop_t st0_ax_acsnoop; + axdomain_t st0_ax_domain; + logic st0_ar_accepts_dirty; + logic st0_ar_accepts_dirty_shared; + logic st0_ar_accepts_shared; + logic st0_pipe_valid; + logic st0_pipe_ready; + logic st0_hazard; + logic st0_replay; + slv_idx_t st0_slv_idx; + st1_t st0_pipe; + + logic st1_pipe_valid; + logic st1_pipe_ready; + st1_t st1; + logic st1_valid; + logic st1_ready; + logic st1_r_resp_shared; + logic st1_r_resp_dirty; + logic st1_aw_sel; + logic st1_ar_sel; + logic st1_cd_sel; + logic st1_cd_write; + logic st1_cd_read; // }}} // AX arbiter // {{{ ace_ccu_ax_arbiter #( - .CcuCfg (CcuCfg), - .ccu_aw_t(ccu_aw_t), - .ccu_ar_t(ccu_ar_t), - .ccu_ax_t(ccu_ax_t) + .CcuCfg (CcuCfg), + .midend_aw_t(midend_aw_t), + .midend_ar_t(midend_ar_t), + .midend_ax_t(midend_ax_t) ) u_st0_ax_arbiter ( .clk_i, .rst_ni, @@ -215,7 +215,7 @@ module ace_ccu_snoop_pipe assign st0_pipe_valid = st0_ac_valid_o && st0_ac_ready_i; - assign st0_slv_idx = st0_ax.id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth]; + assign st0_slv_idx = st0_ax.id[CcuCfg.AxiMidendIdWidth-1 : CcuCfg.u.AxiSlvIdWidth]; always_comb begin : ac_bv_comb unique case (st0_ax_domain) diff --git a/src/ccu/ace_ccu_top.sv b/src/ccu/ace_ccu_top.sv index 9b1d55a..cea7a1d 100644 --- a/src/ccu/ace_ccu_top.sv +++ b/src/ccu/ace_ccu_top.sv @@ -61,7 +61,8 @@ module ace_ccu_top // AXI/ACE types typedef logic [CcuCfg.u.AxiSlvIdWidth-1:0] slv_id_t; - typedef logic [CcuCfg.AxiCcuIdWidth-1:0] ccu_id_t; + typedef logic [CcuCfg.AxiMidendIdWidth-1:0] midend_id_t; + typedef logic [CcuCfg.AxiBackendIdWidth-1:0] backend_id_t; typedef logic [CcuCfg.AxiMstIdWidth-1:0] mst_id_t; typedef logic [CcuCfg.u.AxiAddrWidth-1:0] addr_t; typedef logic [CcuCfg.u.AxiDataWidth-1:0] data_t; @@ -69,19 +70,19 @@ module ace_ccu_top typedef logic [CcuCfg.u.AxiUserWidth-1:0] user_t; // Intermediate ACE and AXI channel types - `ACE_TYPEDEF_AW_CHAN_T(ccu_ace_aw_t, addr_t, ccu_id_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(ccu_ace_b_t, ccu_id_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(ccu_ace_ar_t, addr_t, ccu_id_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(ccu_ace_r_t, data_t, ccu_id_t, user_t) - `ACE_TYPEDEF_REQ_T(ccu_ace_req_t, ccu_ace_aw_t, w_t, ccu_ace_ar_t) - `ACE_TYPEDEF_RESP_T(ccu_ace_resp_t, ccu_ace_b_t, ccu_ace_r_t) - - `AXI_TYPEDEF_AW_CHAN_T(ccu_axi_aw_t, addr_t, ccu_id_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(ccu_axi_b_t, ccu_id_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(ccu_axi_ar_t, addr_t, ccu_id_t, user_t) - `AXI_TYPEDEF_R_CHAN_T(ccu_axi_r_t, data_t, ccu_id_t, user_t) - `AXI_TYPEDEF_REQ_T(ccu_axi_req_t, ccu_axi_aw_t, w_t, ccu_axi_ar_t) - `AXI_TYPEDEF_RESP_T(ccu_axi_resp_t, ccu_axi_b_t, ccu_axi_r_t) + `ACE_TYPEDEF_AW_CHAN_T(midend_aw_t, addr_t, midend_id_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(midend_b_t, midend_id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(midend_ar_t, addr_t, midend_id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(midend_r_t, data_t, midend_id_t, user_t) + `ACE_TYPEDEF_REQ_T(midend_req_t, midend_aw_t, w_t, midend_ar_t) + `ACE_TYPEDEF_RESP_T(midend_resp_t, midend_b_t, midend_r_t) + + `AXI_TYPEDEF_AW_CHAN_T(backend_aw_t, addr_t, backend_id_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(backend_b_t, backend_id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(backend_ar_t, addr_t, backend_id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(backend_r_t, data_t, backend_id_t, user_t) + `AXI_TYPEDEF_REQ_T(backend_req_t, backend_aw_t, w_t, backend_ar_t) + `AXI_TYPEDEF_RESP_T(backend_resp_t, backend_b_t, backend_r_t) // Transaction ID type typedef logic [CcuCfg.TransactionIdxWidth-1:0] tid_t; @@ -91,7 +92,7 @@ module ace_ccu_top // Internal AW/AR request unified representation typedef struct packed { - ccu_id_t id; + midend_id_t id; addr_t addr; axi_pkg::len_t len; axi_pkg::size_t size; @@ -103,19 +104,19 @@ module ace_ccu_top axi_pkg::region_t region; axi_pkg::atop_t atop; user_t user; - } ccu_ax_t; + } midend_ax_t; // }}} // Internal signals // {{{ - ccu_ace_req_t ccu_nonshareable_req; - ccu_ace_resp_t ccu_nonshareable_resp; - ccu_ace_req_t ccu_shareable_req; - ccu_ace_resp_t ccu_shareable_resp; + midend_req_t ccu_nonshareable_req; + midend_resp_t ccu_nonshareable_resp; + midend_req_t ccu_shareable_req; + midend_resp_t ccu_shareable_resp; slv_bv_t ccu_shareable_rack; slv_bv_t ccu_shareable_wack; - ccu_ace_ar_t replay_ar; + midend_ar_t replay_ar; logic replay_ar_valid; logic replay_ar_ready; @@ -149,18 +150,15 @@ module ace_ccu_top logic tracker_alloc_b; logic tracker_alloc_r; nline_t tracker_alloc_nline; - ccu_id_t tracker_alloc_id; + midend_id_t tracker_alloc_id; tid_t tracker_alloc_tid; logic tracker_dealloc_r_resp; logic tracker_dealloc_b_resp; logic tracker_dealloc_check_b_resp; - ccu_id_t tracker_dealloc_r_resp_id; - ccu_id_t tracker_dealloc_b_resp_id; - logic tracker_dealloc_b_resp_wb; - logic tracker_updt_wb; - tid_t tracker_updt_wb_tid; + midend_id_t tracker_dealloc_r_resp_id; + midend_id_t tracker_dealloc_b_resp_id; - ccu_ax_t pipe_ax; + midend_ax_t pipe_ax; logic pipe_ax_is_write; logic pipe_r_resp_shared; logic pipe_r_resp_dirty; @@ -182,14 +180,14 @@ module ace_ccu_top w_t cd_w; logic cd_w_valid; logic cd_w_ready; - ccu_ace_r_t cd_r; + midend_r_t cd_r; logic cd_r_valid; logic cd_r_ready; - ccu_axi_req_t axi_shareable_req; - ccu_axi_resp_t axi_shareable_resp; - ccu_axi_req_t axi_nonshareable_req; - ccu_axi_resp_t axi_nonshareable_resp; + backend_req_t axi_shareable_req; + backend_resp_t axi_shareable_resp; + backend_req_t axi_nonshareable_req; + backend_resp_t axi_nonshareable_resp; mst_req_t mst_req; mst_resp_t mst_resp; @@ -199,22 +197,22 @@ module ace_ccu_top // Frontend // {{{ ace_ccu_frontend #( - .CcuCfg (CcuCfg), - .slv_bv_t (slv_bv_t), - .slv_idx_t (slv_idx_t), - .slv_aw_t (slv_aw_t), - .w_t (w_t), - .slv_b_t (slv_b_t), - .slv_ar_t (slv_ar_t), - .slv_r_t (slv_r_t), - .slv_req_t (slv_req_t), - .slv_resp_t(slv_resp_t), - .ccu_aw_t (ccu_ace_aw_t), - .ccu_b_t (ccu_ace_b_t), - .ccu_ar_t (ccu_ace_ar_t), - .ccu_r_t (ccu_ace_r_t), - .ccu_req_t (ccu_ace_req_t), - .ccu_resp_t(ccu_ace_resp_t) + .CcuCfg (CcuCfg), + .slv_bv_t (slv_bv_t), + .slv_idx_t (slv_idx_t), + .slv_aw_t (slv_aw_t), + .w_t (w_t), + .slv_b_t (slv_b_t), + .slv_ar_t (slv_ar_t), + .slv_r_t (slv_r_t), + .slv_req_t (slv_req_t), + .slv_resp_t (slv_resp_t), + .midend_aw_t (midend_aw_t), + .midend_b_t (midend_b_t), + .midend_ar_t (midend_ar_t), + .midend_r_t (midend_r_t), + .midend_req_t (midend_req_t), + .midend_resp_t(midend_resp_t) ) u_ace_ccu_frontend ( .clk_i, .rst_ni, @@ -234,10 +232,10 @@ module ace_ccu_top ace_ccu_snoop_pipe #( .CcuCfg (CcuCfg), .domain_rule_t(domain_rule_t), - .ccu_ax_t (ccu_ax_t), - .ccu_aw_t (ccu_ace_aw_t), - .ccu_ar_t (ccu_ace_ar_t), - .ccu_id_t (ccu_id_t), + .midend_ax_t (midend_ax_t), + .midend_aw_t (midend_aw_t), + .midend_ar_t (midend_ar_t), + .midend_id_t (midend_id_t), .ac_t (snoop_ac_t), .cr_t (snoop_cr_t), .slv_bv_t (slv_bv_t), @@ -363,12 +361,12 @@ module ace_ccu_top assign tracker_dealloc_b_resp_id = ccu_shareable_resp.b.id; ace_ccu_tracker #( - .CcuCfg (CcuCfg), - .slv_bv_t (slv_bv_t), - .slv_idx_t(slv_idx_t), - .nline_t (nline_t), - .ccu_id_t (ccu_id_t), - .tid_t (tid_t) + .CcuCfg (CcuCfg), + .slv_bv_t (slv_bv_t), + .slv_idx_t (slv_idx_t), + .nline_t (nline_t), + .midend_id_t(midend_id_t), + .tid_t (tid_t) ) u_ace_ccu_tracker ( .clk_i, .rst_ni, @@ -389,9 +387,6 @@ module ace_ccu_top .dealloc_b_resp_i (tracker_dealloc_b_resp), .dealloc_check_b_resp_i(tracker_dealloc_check_b_resp), .dealloc_b_resp_id_i (tracker_dealloc_b_resp_id), - .dealloc_b_resp_wb_o (tracker_dealloc_b_resp_wb), - .updt_wb_i (tracker_updt_wb), - .updt_wb_tid_i (tracker_updt_wb_tid), .evt_hit_id_o ( /*unused*/), .evt_hit_nline_o ( /*unused*/) ); @@ -415,42 +410,40 @@ module ace_ccu_top // Write Unit // {{{ ace_ccu_write #( - .CcuCfg (CcuCfg), - .ccu_ax_t(ccu_ax_t), - .tid_t (tid_t), - .ccu_aw_t(ccu_axi_aw_t), - .w_t (w_t), - .ccu_b_t (ccu_axi_b_t) + .CcuCfg (CcuCfg), + .midend_ax_t (midend_ax_t), + .tid_t (tid_t), + .backend_aw_t(backend_aw_t), + .w_t (w_t), + .midend_b_t (midend_b_t), + .backend_b_t (backend_b_t) ) u_ace_ccu_write_unit ( .clk_i, .rst_ni, - .valid_i (write_valid), - .ready_o (write_ready), - .ax_i (pipe_ax), - .ax_is_write_i (pipe_ax_is_write), - .ax_is_writeback_i (pipe_cd_ctrl_write), - .ax_tid_i (pipe_ax_tid), - .tracker_updt_wb_o (tracker_updt_wb), - .tracker_updt_wb_tid_o(tracker_updt_wb_tid), - .b_is_writeback_i (tracker_dealloc_b_resp_wb), - .w_i (write_w), - .w_valid_i (write_w_valid), - .w_ready_o (write_w_ready), - .cd_w_i (cd_w), - .cd_w_valid_i (cd_w_valid), - .cd_w_ready_o (cd_w_ready), - .b_o (ccu_shareable_resp.b), - .b_valid_o (ccu_shareable_resp.b_valid), - .b_ready_i (ccu_shareable_req.b_ready), - .aw_o (axi_shareable_req.aw), - .aw_valid_o (axi_shareable_req.aw_valid), - .aw_ready_i (axi_shareable_resp.aw_ready), - .w_o (axi_shareable_req.w), - .w_valid_o (axi_shareable_req.w_valid), - .w_ready_i (axi_shareable_resp.w_ready), - .b_i (axi_shareable_resp.b), - .b_valid_i (axi_shareable_resp.b_valid), - .b_ready_o (axi_shareable_req.b_ready) + .valid_i (write_valid), + .ready_o (write_ready), + .ax_i (pipe_ax), + .ax_is_write_i (pipe_ax_is_write), + .ax_is_writeback_i(pipe_cd_ctrl_write), + .ax_tid_i (pipe_ax_tid), + .w_i (write_w), + .w_valid_i (write_w_valid), + .w_ready_o (write_w_ready), + .cd_w_i (cd_w), + .cd_w_valid_i (cd_w_valid), + .cd_w_ready_o (cd_w_ready), + .b_o (ccu_shareable_resp.b), + .b_valid_o (ccu_shareable_resp.b_valid), + .b_ready_i (ccu_shareable_req.b_ready), + .aw_o (axi_shareable_req.aw), + .aw_valid_o (axi_shareable_req.aw_valid), + .aw_ready_i (axi_shareable_resp.aw_ready), + .w_o (axi_shareable_req.w), + .w_valid_o (axi_shareable_req.w_valid), + .w_ready_i (axi_shareable_resp.w_ready), + .b_i (axi_shareable_resp.b), + .b_valid_i (axi_shareable_resp.b_valid), + .b_ready_o (axi_shareable_req.b_ready) ); // }}} @@ -458,11 +451,11 @@ module ace_ccu_top // {{{ ace_ccu_read #( .CcuCfg (CcuCfg), - .ccu_ax_t (ccu_ax_t), + .midend_ax_t (midend_ax_t), .tid_t (tid_t), - .ccu_axi_ar_t(ccu_axi_ar_t), - .ccu_axi_r_t (ccu_axi_r_t), - .ccu_ace_r_t (ccu_ace_r_t) + .backend_ar_t(backend_ar_t), + .backend_r_t (backend_r_t), + .midend_r_t (midend_r_t) ) u_ace_ccu_read_unit ( .clk_i, .rst_ni, @@ -488,14 +481,14 @@ module ace_ccu_top // CD Ctrl Unit // {{{ ace_ccu_cd_ctrl #( - .CcuCfg (CcuCfg), - .ccu_ax_t(ccu_ax_t), - .ccu_id_t(ccu_id_t), - .user_t (user_t), - .cd_t (snoop_cd_t), - .slv_bv_t(slv_bv_t), - .w_t (w_t), - .ccu_r_t (ccu_ace_r_t) + .CcuCfg (CcuCfg), + .midend_ax_t(midend_ax_t), + .midend_id_t(midend_id_t), + .user_t (user_t), + .cd_t (snoop_cd_t), + .slv_bv_t (slv_bv_t), + .w_t (w_t), + .midend_r_t (midend_r_t) ) u_ace_ccu_cd_ctrl ( .clk_i, .rst_ni, @@ -525,18 +518,18 @@ module ace_ccu_top `AXI_TO_ACE_ASSIGN_RESP(ccu_nonshareable_resp, axi_nonshareable_resp) axi_mux #( - .SlvAxiIDWidth(CcuCfg.AxiCcuIdWidth), - .slv_aw_chan_t(ccu_axi_aw_t), + .SlvAxiIDWidth(CcuCfg.AxiBackendIdWidth), + .slv_aw_chan_t(backend_aw_t), .mst_aw_chan_t(mst_aw_t), .w_chan_t (w_t), - .slv_b_chan_t (ccu_axi_b_t), + .slv_b_chan_t (backend_b_t), .mst_b_chan_t (mst_b_t), - .slv_ar_chan_t(ccu_axi_ar_t), + .slv_ar_chan_t(backend_ar_t), .mst_ar_chan_t(mst_ar_t), - .slv_r_chan_t (ccu_axi_r_t), + .slv_r_chan_t (backend_r_t), .mst_r_chan_t (mst_r_t), - .slv_req_t (ccu_axi_req_t), - .slv_resp_t (ccu_axi_resp_t), + .slv_req_t (backend_req_t), + .slv_resp_t (backend_resp_t), .mst_req_t (mst_req_t), .mst_resp_t (mst_resp_t), .NoSlvPorts (2), diff --git a/src/ccu/ace_ccu_tracker.sv b/src/ccu/ace_ccu_tracker.sv index 396938e..d5db5af 100644 --- a/src/ccu/ace_ccu_tracker.sv +++ b/src/ccu/ace_ccu_tracker.sv @@ -13,12 +13,12 @@ module ace_ccu_tracker import ace_pkg::*; import ace_ccu_pkg::*; #( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type slv_bv_t = logic, - parameter type slv_idx_t = logic, - parameter type nline_t = logic, - parameter type ccu_id_t = logic, - parameter type tid_t = logic + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type slv_bv_t = logic, + parameter type slv_idx_t = logic, + parameter type nline_t = logic, + parameter type midend_id_t = logic, + parameter type tid_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -28,32 +28,25 @@ module ace_ccu_tracker // Check/alloc interface // {{{ - input logic check_i, - output logic check_hit_o, - input logic alloc_i, - input logic alloc_b_i, - input logic alloc_r_i, - input nline_t alloc_nline_i, - input ccu_id_t alloc_id_i, - output tid_t alloc_tid_o, + input logic check_i, + output logic check_hit_o, + input logic alloc_i, + input logic alloc_b_i, + input logic alloc_r_i, + input nline_t alloc_nline_i, + input midend_id_t alloc_id_i, + output tid_t alloc_tid_o, // }}} // Lookup/dealloc interface // {{{ - input slv_bv_t dealloc_rack_i, - input slv_bv_t dealloc_wack_i, - input logic dealloc_r_resp_i, - input ccu_id_t dealloc_r_resp_id_i, - input logic dealloc_b_resp_i, - input logic dealloc_check_b_resp_i, - input ccu_id_t dealloc_b_resp_id_i, - output logic dealloc_b_resp_wb_o, - // }}} - - // Writeback update interface - // {{{ - input logic updt_wb_i, - input tid_t updt_wb_tid_i, + input slv_bv_t dealloc_rack_i, + input slv_bv_t dealloc_wack_i, + input logic dealloc_r_resp_i, + input midend_id_t dealloc_r_resp_id_i, + input logic dealloc_b_resp_i, + input logic dealloc_check_b_resp_i, + input midend_id_t dealloc_b_resp_id_i, // }}} // Performance events @@ -68,12 +61,11 @@ module ace_ccu_tracker typedef struct packed { logic r; logic b; - logic wb; } meta_t; typedef struct packed { - nline_t nline; - ccu_id_t id; + nline_t nline; + midend_id_t id; } data_t; // }}} @@ -128,7 +120,7 @@ module ace_ccu_tracker // TODO: can this be simplified? for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_dealloc slv_idx_t dealloc_slv_id; - assign dealloc_slv_id = data_q[i].id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth]; + assign dealloc_slv_id = data_q[i].id[CcuCfg.AxiMidendIdWidth-1 : CcuCfg.u.AxiSlvIdWidth]; assign meta_clr[i].r = dealloc_rack_i[dealloc_slv_id] && (i == rack_queue_rdata[dealloc_slv_id]); assign meta_clr[i].b = dealloc_wack_i[dealloc_slv_id] && (i == wack_queue_rdata[dealloc_slv_id]); assign valid_clr[i] = ~|meta_d[i]; @@ -153,8 +145,8 @@ module ace_ccu_tracker // Push an entry ID to the wack/rack queues if the dealloc response matches the ID of the transaction // that is being deallocated - assign wack_queue_push = dealloc_b_resp_i && data_q[wack_queue_wdata].id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); - assign rack_queue_push = dealloc_r_resp_i && data_q[rack_queue_wdata].id[CcuCfg.AxiCcuIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); + assign wack_queue_push = dealloc_b_resp_i && data_q[wack_queue_wdata].id[CcuCfg.AxiMidendIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); + assign rack_queue_push = dealloc_r_resp_i && data_q[rack_queue_wdata].id[CcuCfg.AxiMidendIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); fifo_v3 #( .FALL_THROUGH(1'b0), @@ -227,31 +219,21 @@ module ace_ccu_tracker assign hit_nline_bv[i] = valid_q[i] && (data_q[i].nline == alloc_nline_i); end - assign hit_id = |hit_id_bv; - assign hit_nline = |hit_nline_bv; - assign check_hit_o = check_i && (hit_id || hit_nline); - // }}} - - // Writeback logic - // {{{ - for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_writeback - assign meta_set[i].wb = updt_wb_i && updt_wb_tid_i == CcuCfg.TransactionIdxWidth'(i); - assign meta_clr[i].wb = dealloc_check_b_resp_i && wack_queue_wdata == CcuCfg.TransactionIdxWidth'(i); - end - - assign dealloc_b_resp_wb_o = dealloc_check_b_resp_i && meta_q[wack_queue_wdata].wb; + assign hit_id = |hit_id_bv; + assign hit_nline = |hit_nline_bv; + assign check_hit_o = check_i && (hit_id || hit_nline); // }}} // Global control // {{{ - assign full_o = (valid_q == '1); - assign empty_o = (valid_q == '0); + assign full_o = (valid_q == '1); + assign empty_o = (valid_q == '0); // }}} // Performance events // {{{ - assign evt_hit_id_o = check_i && hit_id; - assign evt_hit_nline_o = check_i && hit_nline; + assign evt_hit_id_o = check_i && hit_id; + assign evt_hit_nline_o = check_i && hit_nline; // }}} endmodule diff --git a/src/ccu/ace_ccu_write.sv b/src/ccu/ace_ccu_write.sv index 99a674d..bffc709 100644 --- a/src/ccu/ace_ccu_write.sv +++ b/src/ccu/ace_ccu_write.sv @@ -15,57 +15,59 @@ module ace_ccu_write import ace_pkg::*; import ace_ccu_pkg::*; #( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type ccu_ax_t = logic, - parameter type tid_t = logic, - parameter type ccu_aw_t = logic, - parameter type w_t = logic, - parameter type ccu_b_t = logic + parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, + parameter type midend_ax_t = logic, + parameter type tid_t = logic, + parameter type backend_aw_t = logic, + parameter type w_t = logic, + parameter type midend_b_t = logic, + parameter type backend_b_t = logic ) ( input logic clk_i, input logic rst_ni, // Ctrl - input logic valid_i, - output logic ready_o, - input ccu_ax_t ax_i, - input logic ax_is_write_i, - input logic ax_is_writeback_i, - input tid_t ax_tid_i, - - output logic tracker_updt_wb_o, - output tid_t tracker_updt_wb_tid_o, - input logic b_is_writeback_i, - + input logic valid_i, + output logic ready_o, + input midend_ax_t ax_i, + input logic ax_is_write_i, + input logic ax_is_writeback_i, + input tid_t ax_tid_i, // Slv interface - input w_t w_i, - input logic w_valid_i, - output logic w_ready_o, - input w_t cd_w_i, - input logic cd_w_valid_i, - output logic cd_w_ready_o, - output ccu_b_t b_o, - output logic b_valid_o, - input logic b_ready_i, + input w_t w_i, + input logic w_valid_i, + output logic w_ready_o, + input w_t cd_w_i, + input logic cd_w_valid_i, + output logic cd_w_ready_o, + output midend_b_t b_o, + output logic b_valid_o, + input logic b_ready_i, // Mst interface - output ccu_aw_t aw_o, - output logic aw_valid_o, - input logic aw_ready_i, - output w_t w_o, - output logic w_valid_o, - input logic w_ready_i, - input ccu_b_t b_i, - input logic b_valid_i, - output logic b_ready_o + output backend_aw_t aw_o, + output logic aw_valid_o, + input logic aw_ready_i, + output w_t w_o, + output logic w_valid_o, + input logic w_ready_i, + input backend_b_t b_i, + input logic b_valid_i, + output logic b_ready_o ); // Typedefs // {{{ typedef struct packed { - ccu_ax_t ax; - logic ax_is_write; - logic ax_is_writeback; - tid_t ax_tid; + midend_ax_t ax; + logic ax_is_write; + logic ax_is_writeback; + tid_t ax_tid; } aw_sync_reg_t; + + typedef enum { + AW_FSM_IDLE, + AW_FSM_WAIT_B_RESP, + AW_FSM_PASSTHROUGH + } aw_fsm_e; // }}} // Internal signals @@ -74,10 +76,11 @@ module ace_ccu_write aw_sync_reg_t aw_sync_rdata; logic aw_sync_valid; logic aw_sync_ready; - logic aw_sync_gate; + logic aw_fsm_valid; + logic aw_fsm_ready; logic aw_is_writeback; - logic aw_writeback_done_d; - logic aw_writeback_done_q; + aw_fsm_e aw_fsm_d; + aw_fsm_e aw_fsm_q; logic w_ctrl_fifo_valid_in; logic w_ctrl_fifo_ready_in; @@ -86,6 +89,7 @@ module ace_ccu_write logic w_mux_valid_out; logic w_mux_ready_out; logic w_is_write_back; + logic b_is_write_back; // }}} // AW channel @@ -110,46 +114,56 @@ module ace_ccu_write .ready_o (ready_o), .data_i (aw_sync_wdata), .valid_o (aw_sync_valid), - .ready_i (aw_sync_ready && !aw_sync_gate), + .ready_i (aw_sync_ready), .data_o (aw_sync_rdata) ); - assign tracker_updt_wb_tid_o = aw_sync_rdata.ax_tid; - always_comb begin : aw_writeback_fsm_comb - aw_writeback_done_d = aw_writeback_done_q; - aw_is_writeback = 1'b0; - aw_sync_gate = 1'b0; - - tracker_updt_wb_o = 1'b0; - - if (!aw_writeback_done_q) begin - if (aw_sync_rdata.ax_is_writeback) begin - // A writeback is pending - aw_is_writeback = 1'b1; - if (aw_sync_valid && aw_sync_ready) begin - // The writeback request is done - tracker_updt_wb_o = 1'b1; - if (aw_sync_rdata.ax_is_write) begin - // A write is also pending - aw_writeback_done_d = 1'b1; - aw_sync_gate = 1'b1; + aw_fsm_d = aw_fsm_q; + + aw_is_writeback = 1'b0; + aw_fsm_valid = aw_sync_valid; + aw_sync_ready = aw_fsm_ready; + + case (aw_fsm_q) + AW_FSM_IDLE: begin + if (aw_sync_rdata.ax_is_writeback) begin + // A writeback is pending + aw_is_writeback = 1'b1; + if (aw_fsm_valid && aw_fsm_ready) begin + // The writeback request is done + if (aw_sync_rdata.ax_is_write) begin + // A write is also pending + aw_fsm_d = AW_FSM_WAIT_B_RESP; + aw_sync_ready = 1'b0; + end end end end - end else begin - // Send the pending write after the writeback - if (aw_sync_valid && aw_sync_ready) begin - aw_writeback_done_d = 1'b0; + AW_FSM_WAIT_B_RESP: begin + aw_fsm_valid = 1'b0; + aw_sync_ready = 1'b0; + + if (b_valid_i && b_ready_o && {1'b1, aw_sync_rdata.ax.id} == b_i.id) begin + // The writeback response is received + // The pending write can be sent + aw_fsm_d = AW_FSM_PASSTHROUGH; + end end - end + AW_FSM_PASSTHROUGH: begin + // Let the handshake complete + if (aw_fsm_valid && aw_fsm_ready) begin + aw_fsm_d = AW_FSM_IDLE; + end + end + endcase end always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin - aw_writeback_done_q <= 1'b0; + aw_fsm_q <= AW_FSM_IDLE; end else begin - aw_writeback_done_q <= aw_writeback_done_d; + aw_fsm_q <= aw_fsm_d; end end @@ -159,6 +173,8 @@ module ace_ccu_write `AXI_SET_AW_STRUCT(aw_o, aw_sync_rdata.ax) if (aw_is_writeback) begin + // Use the MSB ID bit to indicate a writeback + aw_o.id[CcuCfg.AxiBackendIdWidth-1] = 1'b1; // Pass a full cacheline aw_o.addr = axi_pkg::aligned_addr(aw_sync_rdata.ax.addr, CcuCfg.CachelineBytesIdxWidth); aw_o.len = CcuCfg.CachelineAxiTransfers - 1; @@ -176,8 +192,8 @@ module ace_ccu_write ) u_aw_fork ( .clk_i, .rst_ni, - .valid_i(aw_sync_valid), - .ready_o(aw_sync_ready), + .valid_i(aw_fsm_valid), + .ready_o(aw_fsm_ready), .valid_o({aw_valid_o, w_ctrl_fifo_valid_in}), .ready_i({aw_ready_i, w_ctrl_fifo_ready_in}) ); @@ -232,11 +248,13 @@ module ace_ccu_write stream_filter u_b_filter ( .valid_i(b_valid_i), .ready_o(b_ready_o), - .drop_i (b_is_writeback_i), + .drop_i (b_is_write_back), .valid_o(b_valid_o), .ready_i(b_ready_i) ); + assign b_is_write_back = b_i.id[CcuCfg.AxiBackendIdWidth-1]; + `AXI_ASSIGN_B_STRUCT(b_o, b_i) // }}} From 4c03a2c5cbd86838a2ea681aacf3a7a87b7c40c0 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 26 Jun 2025 11:26:41 +0200 Subject: [PATCH 055/109] ccu_pkg: remove unused struct typedef --- src/ccu/ace_ccu_pkg.sv | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/ccu/ace_ccu_pkg.sv b/src/ccu/ace_ccu_pkg.sv index 7b6673e..ba376e7 100644 --- a/src/ccu/ace_ccu_pkg.sv +++ b/src/ccu/ace_ccu_pkg.sv @@ -84,13 +84,4 @@ package ace_ccu_pkg; return p; endfunction - // Typedefs - - // CD ctrl structure - typedef struct packed { - logic read; - logic write; - logic drop; - } cd_sel_t; - endpackage From 176a7e3a1c7c218144db03f4be8e6487cabe2e27 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 26 Jun 2025 11:27:07 +0200 Subject: [PATCH 056/109] ccu_snoop_pipe: redefine stage 1 stall event --- src/ccu/ace_ccu_snoop_pipe.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index 75905c9..9297e10 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -338,7 +338,7 @@ module ace_ccu_snoop_pipe // Performance events // {{{ assign evt_st0_stall_o = st0_ax_valid && !st0_ax_ready; - assign evt_st1_stall_o = st1_valid && !st1_ready; + assign evt_st1_stall_o = st1_pipe_valid && !st1_pipe_ready; // }}} endmodule From ec2d9b86efb045188e3b3a0950297ba0d1f5a6ff Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 26 Jun 2025 17:10:10 +0200 Subject: [PATCH 057/109] ccu_cd_ctrl: fix `r_last` in read response from CD channel --- src/ccu/ace_ccu_cd_ctrl.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ace_ccu_cd_ctrl.sv b/src/ccu/ace_ccu_cd_ctrl.sv index 9e5d94e..069684d 100644 --- a/src/ccu/ace_ccu_cd_ctrl.sv +++ b/src/ccu/ace_ccu_cd_ctrl.sv @@ -222,7 +222,7 @@ module ace_ccu_cd_ctrl id: cd_ctrl_sync_rdata.id, data: cd.data, resp: {cd_ctrl_sync_rdata.r_resp_shared, cd_ctrl_sync_rdata.r_resp_dirty, 2'b0}, - last: cd.last, + last: r_last, user: cd_ctrl_sync_rdata.r_user }; // }}} From 64a0b986b74437874c47928b0967b701fac4fe87 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Sun, 29 Jun 2025 11:26:49 +0200 Subject: [PATCH 058/109] Bender.yml: update `axi` version --- Bender.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Bender.yml b/Bender.yml index e85fc31..ddb2ce4 100644 --- a/Bender.yml +++ b/Bender.yml @@ -5,7 +5,7 @@ package: - "Riccardo Tedeschi " dependencies: - axi: { git: "https://github.com/ricted98/axi.git", rev: 1fd96dec948da018e50a9d40bf78f59bb2f6cd19 } + axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.8 } common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.38.0 } axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", version: 0.8.2} From 1791b5b3baf337a7305cf61f3d895bcacca7436a Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 30 Jun 2025 14:50:38 +0200 Subject: [PATCH 059/109] scripts: update and clean up `.do` scripts --- scripts/tb_ace_ccu_top.do | 29 +++++++---------------------- scripts/tb_ccu_ctrl_r_snoop.do | 26 -------------------------- scripts/tb_ccu_ctrl_wr_snoop.do | 26 -------------------------- 3 files changed, 7 insertions(+), 74 deletions(-) delete mode 100644 scripts/tb_ccu_ctrl_r_snoop.do delete mode 100644 scripts/tb_ccu_ctrl_wr_snoop.do diff --git a/scripts/tb_ace_ccu_top.do b/scripts/tb_ace_ccu_top.do index df0467c..c6bc62e 100644 --- a/scripts/tb_ace_ccu_top.do +++ b/scripts/tb_ace_ccu_top.do @@ -5,37 +5,22 @@ log -class cache_test_pkg::cache_scoreboard::cache_scoreboard__1 do snoop_types.do # Figure out number of masters from number of ACE interfaces -set n_masters [llength [find instances sim:/tb_ace_ccu_top/ace_intf*]] - -# number of snoop blocks -set n_snoops [llength [find blocks sim:/tb_ace_ccu_top/ccu/i_ace_ccu_top/i_master_path/gen_snoop*]] +set n_masters [examine -radix unsigned sim:/tb_ace_ccu_top/TbNumMst] add wave -divider "Clock and Reset" -add wave sim:/tb_ace_ccu_top/ccu/clk_i -add wave sim:/tb_ace_ccu_top/ccu/rst_ni +add wave sim:/tb_ace_ccu_top/clk +add wave sim:/tb_ace_ccu_top/rst_n add wave -divider "Towards memory" -add wave sim:/tb_ace_ccu_top/ccu/mst_req -add wave sim:/tb_ace_ccu_top/ccu/mst_resp +add wave sim:/tb_ace_ccu_top/axi_intf/* for {set n 0} {$n < $n_masters} {incr n 1} { add wave -divider "Towards cached master m$n" - add wave sim:/tb_ace_ccu_top/ccu/slv_reqs[$n] - add wave sim:/tb_ace_ccu_top/ccu/slv_resps[$n] + add wave sim:/tb_ace_ccu_top/ace_intf[$n]/* add wave -divider "Towards snooped cache m$n" - add wave sim:/tb_ace_ccu_top/ccu/snoop_reqs[$n] - add wave sim:/tb_ace_ccu_top/ccu/snoop_resps[$n] - - radix signal sim:/tb_ace_ccu_top/ccu/slv_reqs[$n].aw.snoop WriteSnoop - radix signal sim:/tb_ace_ccu_top/ccu/slv_reqs[$n].ar.snoop ReadSnoop -} - -for {set n 0} {$n < $n_snoops} {incr n 1} { - add wave -divider "FSM State $n" - add wave -label r_fsm sim:/tb_ace_ccu_top/ccu/i_ace_ccu_top/i_master_path/gen_snoop[$n]/i_snoop_path/i_ccu_ctrl_r_snoop/fsm_state_q - add wave -label wr_fsm sim:/tb_ace_ccu_top/ccu/i_ace_ccu_top/i_master_path/gen_snoop[$n]/i_snoop_path/i_ccu_ctrl_wr_snoop/fsm_state_q + add wave sim:/tb_ace_ccu_top/snoop_intf[$n]/* } onfinish stop run -all -view wave \ No newline at end of file +view wave diff --git a/scripts/tb_ccu_ctrl_r_snoop.do b/scripts/tb_ccu_ctrl_r_snoop.do deleted file mode 100644 index c5fc86b..0000000 --- a/scripts/tb_ccu_ctrl_r_snoop.do +++ /dev/null @@ -1,26 +0,0 @@ -configure wave -signalnamewidth 1 - -do snoop_types.do - -add wave -divider "Clock and Reset" -add wave sim:/tb_ccu_ctrl_r_snoop/DUT/clk_i -add wave sim:/tb_ccu_ctrl_r_snoop/DUT/rst_ni -add wave -divider "FSM State" -add wave sim:/tb_ccu_ctrl_r_snoop/DUT/fsm_state_q -add wave -divider "Towards cached master" -add wave sim:/tb_ccu_ctrl_r_snoop/DUT/slv_req_i -add wave sim:/tb_ccu_ctrl_r_snoop/DUT/slv_resp_o -add wave -divider "Towards memory" -add wave sim:/tb_ccu_ctrl_r_snoop/DUT/mst_req_o -add wave sim:/tb_ccu_ctrl_r_snoop/DUT/mst_resp_i -add wave -divider "Towards snooped cache" -add wave sim:/tb_ccu_ctrl_r_snoop/DUT/snoop_req_o -add wave sim:/tb_ccu_ctrl_r_snoop/DUT/snoop_resp_i - -radix signal sim:/tb_ccu_ctrl_r_snoop/DUT/slv_req_i.aw.snoop WriteSnoop -radix signal sim:/tb_ccu_ctrl_r_snoop/DUT/slv_req_i.ar.snoop ReadSnoop - -log -r * -onfinish stop -run -all -view wave \ No newline at end of file diff --git a/scripts/tb_ccu_ctrl_wr_snoop.do b/scripts/tb_ccu_ctrl_wr_snoop.do deleted file mode 100644 index 9064f82..0000000 --- a/scripts/tb_ccu_ctrl_wr_snoop.do +++ /dev/null @@ -1,26 +0,0 @@ -configure wave -signalnamewidth 1 - -do snoop_types.do - -add wave -divider "Clock and Reset" -add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/clk_i -add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/rst_ni -add wave -divider "FSM State" -add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/fsm_state_q -add wave -divider "Towards cached master" -add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/slv_req_i -add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/slv_resp_o -add wave -divider "Towards memory" -add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/mst_req_o -add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/mst_resp_i -add wave -divider "Towards snooped cache" -add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/snoop_req_o -add wave sim:/tb_ccu_ctrl_wr_snoop/DUT/snoop_resp_i - -radix signal sim:/tb_ccu_ctrl_wr_snoop/DUT/slv_req_i.aw.snoop WriteSnoop -radix signal sim:/tb_ccu_ctrl_wr_snoop/DUT/slv_req_i.ar.snoop ReadSnoop - -log -r * -onfinish stop -run -all -view wave From 9ba9d805b6346227ea14e28fe8b11ab4c930fac9 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 23 Jul 2025 10:17:51 +0200 Subject: [PATCH 060/109] ace_pkg: remove unused type and add RRESP encoding --- src/ace_pkg.sv | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index be21130..d4b7c75 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -13,6 +13,7 @@ //! ACE Package /// Contains all necessary type definitions, constants, and generally useful functions. +/// Naming conventions are chosen to align with the ACE specification. package ace_pkg; ////////////// @@ -39,13 +40,6 @@ package ace_pkg; logic DataTransfer; } crresp_t; - typedef struct packed { - acsnoop_t snoop_trs; - logic accepts_dirty; - logic accepts_dirty_shared; - logic accepts_shared; - } snoop_info_t; - /////////////// // Encodings // /////////////// @@ -107,6 +101,11 @@ package ace_pkg; // - DVMMessage // Cast the parameters to acsnoop_t for consistency (but works anyway) + // RRESP + // Bit position for additional ACE-related fields + localparam int unsigned RESP_IS_DIRTY = 2; + localparam int unsigned RESP_IS_SHARED = 3; + /////////////// // Functions // /////////////// From 5f8c4bada74b8813dcc153c35b3b5f59d96066cc Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 23 Jul 2025 10:18:29 +0200 Subject: [PATCH 061/109] ccu_cd_ctrl: use RRESP parameters to abstract encoding --- src/ccu/ace_ccu_cd_ctrl.sv | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ccu/ace_ccu_cd_ctrl.sv b/src/ccu/ace_ccu_cd_ctrl.sv index 069684d..d9473af 100644 --- a/src/ccu/ace_ccu_cd_ctrl.sv +++ b/src/ccu/ace_ccu_cd_ctrl.sv @@ -89,6 +89,7 @@ module ace_ccu_cd_ctrl logic r_len_cnt_clr; logic r_len_cnt_en; axi_pkg::len_t r_len_cnt; + rresp_t r_resp; // }}} // Input handshake decoupling @@ -218,10 +219,16 @@ module ace_ccu_cd_ctrl assign r_done_d = !r_len_cnt_clr && ((r_last && r_len_cnt_en) || r_done_q); + always_comb begin : rresp_comb + r_resp = '0; + r_resp[RESP_IS_DIRTY] = cd_ctrl_sync_rdata.r_resp_dirty; + r_resp[RESP_IS_SHARED] = cd_ctrl_sync_rdata.r_resp_shared; + end + assign r_o = '{ id: cd_ctrl_sync_rdata.id, data: cd.data, - resp: {cd_ctrl_sync_rdata.r_resp_shared, cd_ctrl_sync_rdata.r_resp_dirty, 2'b0}, + resp: r_resp, last: r_last, user: cd_ctrl_sync_rdata.r_user }; From 6fb49d2e54798f90fb1b99cfe967cde0ffe18f2f Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 23 Jul 2025 10:19:10 +0200 Subject: [PATCH 062/109] ccu: add assertions --- src/ccu/ace_ccu_cd_ctrl.sv | 20 ++++++++++++++++++++ src/ccu/ace_ccu_snoop_pipe.sv | 7 +++++++ 2 files changed, 27 insertions(+) diff --git a/src/ccu/ace_ccu_cd_ctrl.sv b/src/ccu/ace_ccu_cd_ctrl.sv index d9473af..4df51c7 100644 --- a/src/ccu/ace_ccu_cd_ctrl.sv +++ b/src/ccu/ace_ccu_cd_ctrl.sv @@ -239,4 +239,24 @@ module ace_ccu_cd_ctrl assign w_o = '{data: cd.data, strb: '1, last: cd.last, user: '0}; // }}} + // Assertions + // {{{ + + // If r_done_q is high, r_valid_o should never be raised + assert property (@(posedge clk_i) disable iff (!rst_ni) r_done_q |-> !r_valid_o); + // If r_drop is true, r_valid_o should never be raised + assert property (@(posedge clk_i) disable iff (!rst_ni) r_drop |-> !r_valid_o); + // If r_last is true, r_o.last should be raised + assert property (@(posedge clk_i) disable iff (!rst_ni) r_last |-> r_o.last); + // r_o.last can only be high if r_last is high + assert property (@(posedge clk_i) disable iff (!rst_ni) r_valid_o && r_o.last |-> r_last); + // r_valid_o should not be raised if not in read mode + assert property (@(posedge clk_i) disable iff (!rst_ni) !cd_ctrl_sync_rdata.cd_ctrl_read |-> !r_valid_o); + // r_valid_o should not be raised if cd_sel_read is not asserted + assert property (@(posedge clk_i) disable iff (!rst_ni) !cd_sel_read |-> !r_valid_o); + // w_valid_o should not be raised if cd_sel_write is not asserted + assert property (@(posedge clk_i) disable iff (!rst_ni) !cd_sel_write |-> !w_valid_o); + + // }}} + endmodule diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv index 9297e10..ab8dad8 100644 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ b/src/ccu/ace_ccu_snoop_pipe.sv @@ -341,4 +341,11 @@ module ace_ccu_snoop_pipe assign evt_st1_stall_o = st1_pipe_valid && !st1_pipe_ready; // }}} + // Assertions + // {{{ + + // initiator bit in st0_ac_bv_o should never be set to 1 + assert property (@(posedge clk_i) disable iff (!rst_ni) st0_ac_bv_o[st0_slv_idx] == 1'b0); + // }}} + endmodule From d40546212acfa6dd6214b6985c55edaf81a363c8 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 23 Jul 2025 10:37:57 +0200 Subject: [PATCH 063/109] ccu_cd_ctrl: fix typo --- src/ccu/ace_ccu_cd_ctrl.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ace_ccu_cd_ctrl.sv b/src/ccu/ace_ccu_cd_ctrl.sv index 4df51c7..a5a0a36 100644 --- a/src/ccu/ace_ccu_cd_ctrl.sv +++ b/src/ccu/ace_ccu_cd_ctrl.sv @@ -96,7 +96,7 @@ module ace_ccu_cd_ctrl // {{{ if (CcuCfg.CachelineAxiTransfers == 1) begin : gen_axi_start_trans_eqsize assign r_cd_start_trans = '0; - end else begin : gen_axi_start_trans_eqsize + end else begin : gen_axi_start_trans_diffsize assign r_cd_start_trans = ax_i.addr[CcuCfg.CachelineBytesIdxWidth-1:CcuCfg.AxiDataBytesIdxWidth]; end From 8a516739d39c7fd362ec79e5a84c658da03afd9c Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 24 Jul 2025 09:50:30 +0200 Subject: [PATCH 064/109] Bender.yml: update `axi` dependency to custom branch --- Bender.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Bender.yml b/Bender.yml index ddb2ce4..e9b1623 100644 --- a/Bender.yml +++ b/Bender.yml @@ -5,7 +5,7 @@ package: - "Riccardo Tedeschi " dependencies: - axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.8 } + axi: { git: "https://github.com/pulp-platform/axi.git", rev: 37fa3a93 } # branch: rt/ace common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.38.0 } axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", version: 0.8.2} From 3028979360b667878cd5d095c965062c31eaeec5 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 24 Jul 2025 12:26:23 +0200 Subject: [PATCH 065/109] ccu_cd_ctrl: use `LenWidth` parameter from `axi_pkg` --- src/ccu/ace_ccu_cd_ctrl.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ace_ccu_cd_ctrl.sv b/src/ccu/ace_ccu_cd_ctrl.sv index a5a0a36..cdf5e9e 100644 --- a/src/ccu/ace_ccu_cd_ctrl.sv +++ b/src/ccu/ace_ccu_cd_ctrl.sv @@ -191,7 +191,7 @@ module ace_ccu_cd_ctrl ); counter #( - .WIDTH($bits(axi_pkg::len_t)) + .WIDTH(axi_pkg::LenWidth) ) u_r_len_counter ( .clk_i, .rst_ni, From 5aae0d2326adeb4da1f63623d9f9dcb3212333eb Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 28 Jul 2025 10:03:04 +0200 Subject: [PATCH 066/109] license: fix headers --- src/ace_cut.sv | 4 ++-- src/ace_intf.sv | 1 + src/ace_pkg.sv | 1 + src/ace_snoop_cut.sv | 2 +- src/snoop_intf.sv | 1 + 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/ace_cut.sv b/src/ace_cut.sv index 0eca0f2..47050af 100644 --- a/src/ace_cut.sv +++ b/src/ace_cut.sv @@ -1,4 +1,4 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// Copyright (c) 2014-2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in @@ -13,7 +13,7 @@ // - Wolfgang Roenninger // - Fabian Schuiki // - Andreas Kurth -// - Andrea Tedeschi +// - Riccardo Tedeschi /// An ACE4 cut. /// diff --git a/src/ace_intf.sv b/src/ace_intf.sv index 68ed0b3..2b27407 100644 --- a/src/ace_intf.sv +++ b/src/ace_intf.sv @@ -1,5 +1,6 @@ // Copyright (c) 2014-2018 ETH Zurich, University of Bologna // Copyright (c) 2022 PlanV GmbH +// Copyright (c) 2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index d4b7c75..8996758 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -1,5 +1,6 @@ // Copyright (c) 2014-2018 ETH Zurich, University of Bologna // Copyright (c) 2022 PlanV GmbH +// Copyright (c) 2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in diff --git a/src/ace_snoop_cut.sv b/src/ace_snoop_cut.sv index 3ea54d8..62c9827 100644 --- a/src/ace_snoop_cut.sv +++ b/src/ace_snoop_cut.sv @@ -1,4 +1,4 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// Copyright (c) 2014-2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in diff --git a/src/snoop_intf.sv b/src/snoop_intf.sv index 985907f..11cc5bc 100644 --- a/src/snoop_intf.sv +++ b/src/snoop_intf.sv @@ -1,5 +1,6 @@ // Copyright (c) 2014-2018 ETH Zurich, University of Bologna // Copyright (c) 2022 PlanV GmbH +// Copyright (c) 2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in From 9177971957c5e1642251b05f2a5c7f6d8486e8c8 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 1 Aug 2025 18:54:22 +0200 Subject: [PATCH 067/109] include: add custom type and `__ACE_NO_ACKS` defines --- include/ace/assign.svh | 12 ++++++-- include/ace/typedef.svh | 65 ++++++++++++++++++++++++++++++----------- 2 files changed, 57 insertions(+), 20 deletions(-) diff --git a/include/ace/assign.svh b/include/ace/assign.svh index d37943c..e47d02c 100644 --- a/include/ace/assign.svh +++ b/include/ace/assign.svh @@ -72,8 +72,10 @@ `__ACE_TO_AR(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \ __opt_as __lhs.ar_valid = __rhs.ar_valid; \ __opt_as __lhs.r_ready = __rhs.r_ready; \ + `ifndef __ACE_NO_ACKS \ __opt_as __lhs.wack = __rhs.wack; \ - __opt_as __lhs.rack = __rhs.rack; + __opt_as __lhs.rack = __rhs.rack; \ + `endif `define __ACE_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ __opt_as __lhs.aw_ready = __rhs.aw_ready; \ __opt_as __lhs.ar_ready = __rhs.ar_ready; \ @@ -117,8 +119,10 @@ `AXI_ASSIGN_B(mst, slv) \ `ACE_ASSIGN_AR(slv, mst) \ `ACE_ASSIGN_R(mst, slv) \ + `ifndef __ACE_NO_ACKS \ assign slv.wack = mst.wack; \ - assign slv.rack = mst.rack; + assign slv.rack = mst.rack; \ + `endif //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -147,8 +151,10 @@ `__ACE_TO_R(assign, mon_dv.r, _, axi_if.r, _) \ assign mon_dv.r_valid = axi_if.r_valid; \ assign mon_dv.r_ready = axi_if.r_ready; \ + `ifndef __ACE_NO_ACKS \ assign mon_dv.wack = axi_if.wack; \ - assign mon_dv.rack = axi_if.rack; + assign mon_dv.rack = axi_if.rack; \ + `endif //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/include/ace/typedef.svh b/include/ace/typedef.svh index e877835..72e1740 100644 --- a/include/ace/typedef.svh +++ b/include/ace/typedef.svh @@ -19,7 +19,7 @@ `include "axi/typedef.svh" //////////////////////////////////////////////////////////////////////////////////////////////////// -// AXI4+ATOP Channel and Request/Response Structs (with snoop support) +// ACE4+ATOP Channel and Request/Response Structs // // Usage Example: // `ACE_TYPEDEF_AW_CHAN_T(axi_aw_t, axi_addr_t, axi_id_t, axi_user_t) @@ -73,16 +73,18 @@ } r_chan_t; `define ACE_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) \ typedef struct packed { \ - aw_chan_t aw; \ + aw_chan_t aw; \ logic aw_valid; \ w_chan_t w; \ logic w_valid; \ logic b_ready; \ - ar_chan_t ar; \ + ar_chan_t ar; \ logic ar_valid; \ logic r_ready; \ + `ifndef __ACE_NO_ACKS \ logic wack; \ logic rack; \ + `endif \ } req_t; `define ACE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) \ typedef struct packed { \ @@ -97,25 +99,43 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// -// All AXI4+ATOP Channels and Request/Response Structs in One Macro (with snoop support) +// All ACE4+ATOP Channels and Request/Response Structs in One Macro - Custom Type Name Version +// +// This can be used whenever the user is interested in "precise" control of the naming of the +// individual channels. +// +// Usage Example: +// `ACE_TYPEDEF_ALL_CT(axi, axi_req_t, axi_rsp_t, addr_t, id_t, data_t, strb_t, user_t) +// +// This defines `axi_req_t` and `axi_rsp_t` request/response structs as well as `axi_aw_chan_t`, +// `axi_w_chan_t`, `axi_b_chan_t`, `axi_ar_chan_t`, and `axi_r_chan_t` channel structs. +`define ACE_TYPEDEF_ALL_CT(__name, __req, __rsp, __addr_t, __id_t, __data_t, __strb_t, __user_t) \ + `ACE_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t, __id_t, __user_t) \ + `AXI_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t, __user_t) \ + `AXI_TYPEDEF_B_CHAN_T(__name``_b_chan_t, __id_t, __user_t) \ + `ACE_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t, __id_t, __user_t) \ + `ACE_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t, __id_t, __user_t) \ + `ACE_TYPEDEF_REQ_T(__req, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ + `ACE_TYPEDEF_RESP_T(__rsp, __name``_b_chan_t, __name``_r_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// All ACE4+ATOP Channels and Request/Response Structs in One Macro // // This can be used whenever the user is not interested in "precise" control of the naming of the // individual channels. // // Usage Example: -// `AXI_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t) +// `ACE_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t) // // This defines `axi_req_t` and `axi_resp_t` request/response structs as well as `axi_aw_chan_t`, // `axi_w_chan_t`, `axi_b_chan_t`, `axi_ar_chan_t`, and `axi_r_chan_t` channel structs. -`define ACE_TYPEDEF_ALL(__name, __addr_t, __id_t, __data_t, __strb_t, __user_t) \ - `ACE_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t, __id_t, __user_t) \ - `AXI_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t, __user_t) \ - `AXI_TYPEDEF_B_CHAN_T(__name``_b_chan_t, __id_t, __user_t) \ - `ACE_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t, __id_t, __user_t) \ - `ACE_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t, __id_t, __user_t) \ - `ACE_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ - `ACE_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t) +`define ACE_TYPEDEF_ALL(__name, __addr_t, __id_t, __data_t, __strb_t, __user_t) \ + `ACE_TYPEDEF_ALL_CT(__name, __name``_req_t, __name``_resp_t, __addr_t, __id_t, __data_t, __strb_t, __user_t) //////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// ACE4 Snoop Channel and Request/Response Structs // Usage Example: // `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, snoop_addr_t) // 'SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, snoop_data_t) @@ -151,17 +171,28 @@ } resp_t; //////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// // Usage Example: -// `SNOOP_TYPEDEF_ALL(snoop, addr_t, data_t) +// `SNOOP_TYPEDEF_ALL_CT(snoop, addr_t, data_t) // // This defines `snoop_req_t` and `snoop_resp_t` request/response structs as well as `snoop_ac_chan_t`, // `snoop_cd_chan_t` and `snoop_cr_chan_t` channel structs. - `define SNOOP_TYPEDEF_ALL(__name, __addr_t, __data_t) \ + `define SNOOP_TYPEDEF_ALL_CT(__name, __req, __rsp, __addr_t, __data_t) \ `SNOOP_TYPEDEF_AC_CHAN_T(__name``_ac_chan_t, __addr_t) \ `SNOOP_TYPEDEF_CR_CHAN_T(__name``_cr_chan_t) \ `SNOOP_TYPEDEF_CD_CHAN_T(__name``_cd_chan_t, __data_t) \ - `SNOOP_TYPEDEF_REQ_T(__name``_req_t, __name``_ac_chan_t) \ - `SNOOP_TYPEDEF_RESP_T(__name``_resp_t, __name``_cd_chan_t, __name``_cr_chan_t) + `SNOOP_TYPEDEF_REQ_T(__req, __name``_ac_chan_t) \ + `SNOOP_TYPEDEF_RESP_T(__rsp, __name``_cd_chan_t, __name``_cr_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Usage Example: +// `SNOOP_TYPEDEF_ALL(snoop, addr_t, data_t) +// +// This defines `snoop_req_t` and `snoop_resp_t` request/response structs as well as `snoop_ac_chan_t`, +// `snoop_cd_chan_t` and `snoop_cr_chan_t` channel structs. + `define SNOOP_TYPEDEF_ALL(__name, __addr_t, __data_t) \ + `SNOOP_TYPEDEF_ALL_CT(__name, __name``_req_t, __name``_resp_t, __addr_t, __data_t) //////////////////////////////////////////////////////////////////////////////////////////////////// `endif From caecef15ef4aeda9b3cbcbb8696e0a185a009481 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 1 Aug 2025 18:57:42 +0200 Subject: [PATCH 068/109] src: add ACE mux and demux modules --- Bender.yml | 2 + src/ace_demux_simple.sv | 160 +++++++++++++++ src/ace_mux.sv | 424 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 586 insertions(+) create mode 100644 src/ace_demux_simple.sv create mode 100644 src/ace_mux.sv diff --git a/Bender.yml b/Bender.yml index e9b1623..9e755a2 100644 --- a/Bender.yml +++ b/Bender.yml @@ -24,6 +24,8 @@ sources: # ACE ips - src/ace_cut.sv - src/ace_snoop_cut.sv + - src/ace_demux_simple.sv + - src/ace_mux.sv # CCU src files - src/ccu/ace_ccu_pkg.sv - src/ccu/ace_ccu_ax_arbiter.sv diff --git a/src/ace_demux_simple.sv b/src/ace_demux_simple.sv new file mode 100644 index 0000000..b429729 --- /dev/null +++ b/src/ace_demux_simple.sv @@ -0,0 +1,160 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module ace_demux_simple #( + parameter int unsigned AxiIdWidth = 32'd0, + parameter bit AtopSupport = 1'b1, + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type req_t = logic, + parameter type resp_t = logic, + parameter int unsigned NoMstPorts = 32'd0, + parameter int unsigned MaxTrans = 32'd8, + parameter int unsigned AxiLookBits = 32'd3, + parameter bit UniqueIds = 1'b0, + localparam int unsigned SelectWidth = (NoMstPorts > 32'd1) ? $clog2(NoMstPorts) : 32'd1, + localparam type select_t = logic [SelectWidth-1:0] +) ( + input logic clk_i, + input logic rst_ni, + input logic test_i, + // Slave Port + input req_t slv_req_i, + input select_t slv_aw_select_i, + input select_t slv_ar_select_i, + output resp_t slv_resp_o, + // Master Ports + output req_t [NoMstPorts-1:0] mst_reqs_o, + input resp_t [NoMstPorts-1:0] mst_resps_i +); + + // All subsequent ACE defines will not use RACK/WACKS + `define __ACE_NO_ACKS + + // ACE request structure without RACK and WACK + `ACE_TYPEDEF_REQ_T(__req_t, aw_chan_t, w_chan_t, ar_chan_t) + + __req_t slv_req; + __req_t [NoMstPorts-1:0] mst_reqs; + resp_t [NoMstPorts-1:0] mst_resps; + + select_t mst_b_idx; + select_t mst_r_idx; + select_t wack_idx; + select_t rack_idx; + + logic [NoMstPorts-1:0] mst_wacks; + logic [NoMstPorts-1:0] mst_racks; + + `ACE_ASSIGN_REQ_STRUCT(slv_req, slv_req_i) + + // AXI demux simple instance + // {{{ + axi_demux_simple #( + .AxiIdWidth (AxiIdWidth), + .AtopSupport(AtopSupport), + .axi_req_t (__req_t), + .axi_resp_t (resp_t), + .NoMstPorts (NoMstPorts), + .MaxTrans (MaxTrans), + .AxiLookBits(AxiLookBits), + .UniqueIds (UniqueIds) + ) u_axi_demux ( + .clk_i, + .rst_ni, + .test_i (test_i), + .slv_req_i (slv_req), + .slv_resp_o (slv_resp_o), + .slv_aw_select_i(slv_aw_select_i), + .slv_ar_select_i(slv_ar_select_i), + .mst_reqs_o (mst_reqs), + .mst_resps_i (mst_resps), + .mst_b_idx_o (mst_b_idx), + .mst_r_idx_o (mst_r_idx) + ); + // }}} + + // xACKs generation + // {{{ + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DEPTH (MaxTrans), + .dtype (select_t) + ) i_switch_w_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (mst_b_stall), + .empty_o (), + .usage_o (), + .data_i (mst_b_idx), + .push_i (slv_resp_o.b_valid && slv_req_i.b_ready), + .data_o (wack_idx), + .pop_i (slv_req_i.wack) + ); + + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DEPTH (MaxTrans), + .dtype (select_t) + ) i_switch_r_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (mst_r_stall), + .empty_o (), + .usage_o (), + .data_i (mst_r_idx), + .push_i (slv_resp_o.r_valid && slv_req_i.r_ready && slv_resp_o.r.last), + .data_o (rack_idx), + .pop_i (slv_req_i.rack) + ); + + always_comb begin + mst_wacks = '0; + mst_racks = '0; + + if (slv_req_i.rack) mst_racks[rack_idx] = 1'b1; + if (slv_req_i.wack) mst_wacks[wack_idx] = 1'b1; + end + + always_comb begin + for (int i = 0; i < NoMstPorts; i++) begin + `ACE_SET_REQ_STRUCT(mst_reqs_o[i], mst_reqs[i]) + `ACE_SET_RESP_STRUCT(mst_resps[i], mst_resps_i[i]) + mst_reqs_o[i].rack = mst_racks[i]; + mst_reqs_o[i].wack = mst_wacks[i]; + + if (mst_r_stall) begin + mst_resps[i].r_valid = 1'b0; + mst_reqs_o[i].r_ready = 1'b0; + end + + if (mst_b_stall) begin + mst_resps[i].b_valid = 1'b0; + mst_reqs_o[i].b_ready = 1'b0; + end + end + end + // }}} + + `undef __ACE_NO_ACKS + +endmodule diff --git a/src/ace_mux.sv b/src/ace_mux.sv new file mode 100644 index 0000000..edc11ba --- /dev/null +++ b/src/ace_mux.sv @@ -0,0 +1,424 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module ace_mux #( + // ACE parameter and channel types + parameter int unsigned SlvAxiIDWidth = 32'd0, // AXI ID width, slave ports + parameter type slv_aw_chan_t = logic, // AW Channel Type, slave ports + parameter type mst_aw_chan_t = logic, // AW Channel Type, master port + parameter type w_chan_t = logic, // W Channel Type, all ports + parameter type slv_b_chan_t = logic, // B Channel Type, slave ports + parameter type mst_b_chan_t = logic, // B Channel Type, master port + parameter type slv_ar_chan_t = logic, // AR Channel Type, slave ports + parameter type mst_ar_chan_t = logic, // AR Channel Type, master port + parameter type slv_r_chan_t = logic, // R Channel Type, slave ports + parameter type mst_r_chan_t = logic, // R Channel Type, master port + parameter type slv_req_t = logic, // Slave port request type + parameter type slv_resp_t = logic, // Slave port response type + parameter type mst_req_t = logic, // Master ports request type + parameter type mst_resp_t = logic, // Master ports response type + parameter int unsigned NoSlvPorts = 32'd0, // Number of slave ports + // Maximum number of outstanding transactions per write + parameter int unsigned MaxWTrans = 32'd8, + // Maximum number of outstanding transactions per B channel (ACE) + parameter int unsigned MaxBTrans = 32'd8, + // Maximum number of outstanding transactions per R channel (ACE) + parameter int unsigned MaxRTrans = 32'd8, + // If enabled, this multiplexer is purely combinatorial + parameter bit FallThrough = 1'b0, + // add spill register on write master ports, adds a cycle latency on write channels + parameter bit SpillAw = 1'b1, + parameter bit SpillW = 1'b0, + parameter bit SpillB = 1'b0, + // add spill register on read master ports, adds a cycle latency on read channels + parameter bit SpillAr = 1'b1, + parameter bit SpillR = 1'b0, + // add registers on xACK ports, add a cycle latency on acknowledgment signals (ACE) + parameter bit RegAck = 1'b0 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Test Mode enable + // slave ports (ACE inputs), connect master modules here + input slv_req_t [NoSlvPorts-1:0] slv_reqs_i, + output slv_resp_t [NoSlvPorts-1:0] slv_resps_o, + // master port (ACE outputs), connect slave modules here + output mst_req_t mst_req_o, + input mst_resp_t mst_resp_i +); + // All subsequent ACE defines will not use RACK/WACKS + `define __ACE_NO_ACKS + + // Internal request type without acks + `ACE_TYPEDEF_REQ_T(__slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) + `ACE_TYPEDEF_REQ_T(__mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) + + __slv_req_t [NoSlvPorts-1:0] slv_reqs; + + __mst_req_t mst_req; + mst_resp_t mst_resp; + + // Input req setup + // {{{ + for (genvar i = 0; i < NoSlvPorts; i++) begin + `ACE_ASSIGN_REQ_STRUCT(slv_reqs[i], slv_reqs_i[i]) + end + // }}} + + // AXI MUX instance + // {{{ + axi_mux #( + .SlvAxiIDWidth(SlvAxiIDWidth), + .slv_aw_chan_t(slv_aw_chan_t), + .mst_aw_chan_t(mst_aw_chan_t), + .w_chan_t (w_chan_t), + .slv_b_chan_t (slv_b_chan_t), + .mst_b_chan_t (mst_b_chan_t), + .slv_ar_chan_t(slv_ar_chan_t), + .mst_ar_chan_t(mst_ar_chan_t), + .slv_r_chan_t (slv_r_chan_t), + .mst_r_chan_t (mst_r_chan_t), + .slv_req_t (__slv_req_t), + .slv_resp_t (slv_resp_t), + .mst_req_t (__mst_req_t), + .mst_resp_t (mst_resp_t), + .NoSlvPorts (NoSlvPorts), + .MaxWTrans (MaxWTrans), + .FallThrough (FallThrough), + .SpillAw (SpillAw), + .SpillW (SpillW), + .SpillB (SpillB), + .SpillAr (SpillAr), + .SpillR (SpillR) + ) u_axi_mux ( + .clk_i, + .rst_ni, + .test_i (test_i), + .slv_reqs_i (slv_reqs), + .slv_resps_o(slv_resps_o), + .mst_req_o (mst_req), + .mst_resp_i (mst_resp) + ); + + // }}} + + // Output req/resp setup + // {{{ + logic [NoSlvPorts-1:0] slv_racks; + logic [NoSlvPorts-1:0] slv_wacks; + logic mst_rack; + logic mst_wack; + logic mst_b_stall; + logic mst_r_stall; + + for (genvar i = 0; i < NoSlvPorts; i++) begin + assign slv_racks[i] = slv_reqs_i[i].rack; + assign slv_wacks[i] = slv_reqs_i[i].wack; + end + + always_comb begin + // Use AXI defines since we are working with + // the AXI backward compatible req structure whithout xACKs + `ACE_SET_REQ_STRUCT(mst_req_o, mst_req) + `ACE_SET_RESP_STRUCT(mst_resp, mst_resp_i) + // Get the xACKs from the dedicated logic instead + mst_req_o.wack = mst_wack; + mst_req_o.rack = mst_rack; + + // Stall B if the WACK ROB is full + if (mst_b_stall) begin + mst_resp.b_valid = 1'b0; + mst_req_o.b_ready = 1'b0; + end + + // Stall R if the WACK ROB is full + if (mst_r_stall) begin + mst_resp.r_valid = 1'b0; + mst_req_o.r_ready = 1'b0; + end + end + // }}} + + // xACKs generation + // {{{ + if (NoSlvPorts > 1) begin : gen_xack_rob + localparam int unsigned MstIdxBits = $clog2(NoSlvPorts); + typedef logic [MstIdxBits-1:0] switch_id_t; + + switch_id_t switch_b_id; + switch_id_t switch_r_id; + + logic r_last_handshake; + logic b_handshake; + + assign switch_r_id = mst_resp.r.id[SlvAxiIDWidth+:MstIdxBits]; + assign switch_b_id = mst_resp.b.id[SlvAxiIDWidth+:MstIdxBits]; + + assign r_last_handshake = mst_req_o.r_ready && mst_resp_i.r_valid && mst_resp_i.r.last; + assign b_handshake = mst_req_o.b_ready && mst_resp_i.b_valid; + + ace_mux_xack #( + .N (NoSlvPorts), + .MAX_OUTSTANDING(MaxBTrans), + .FALL_THROUGH (!RegAck) + ) u_wack_gen ( + .clk_i, + .rst_ni, + .empty_o ( /* unused */), + .full_o (mst_b_stall), + .handshake_i(b_handshake), + .idx_i (switch_b_id), + .acks_i (slv_wacks), + .ack_o (mst_wack) + ); + + ace_mux_xack #( + .N (NoSlvPorts), + .MAX_OUTSTANDING(MaxRTrans), + .FALL_THROUGH (!RegAck) + ) u_rack_gen ( + .clk_i, + .rst_ni, + .empty_o ( /* unused */), + .full_o (mst_r_stall), + .handshake_i(r_last_handshake), + .idx_i (switch_r_id), + .acks_i (slv_racks), + .ack_o (mst_rack) + ); + end else if (!RegAck) begin : gen_xack_assign + assign mst_wack = slv_wacks[0]; + assign mst_rack = slv_racks[0]; + assign mst_b_stall = 1'b0; + assign mst_r_stall = 1'b0; + end else begin : gen_xack_ffs + always_ff @(posedge clk_i or negedge rst_ni) begin + mst_wack <= slv_wacks[0]; + mst_rack <= slv_racks[0]; + end + assign mst_b_stall = 1'b0; + assign mst_r_stall = 1'b0; + end + // }}} + + `undef __ACE_NO_ACKS + +endmodule + +// interface wrap +module ace_mux_intf #( + parameter int unsigned SLV_AXI_ID_WIDTH = 32'd0, // Synopsys DC requires default value for params + parameter int unsigned MST_AXI_ID_WIDTH = 32'd0, + parameter int unsigned AXI_ADDR_WIDTH = 32'd0, + parameter int unsigned AXI_DATA_WIDTH = 32'd0, + parameter int unsigned AXI_USER_WIDTH = 32'd0, + parameter int unsigned NO_SLV_PORTS = 32'd0, // Number of slave ports + // Maximum number of outstanding transactions per write + parameter int unsigned MAX_W_TRANS = 32'd8, + // Maximum number of outstanding transactions per B channel (ACE) + parameter int unsigned MAX_B_TRANS = 32'd8, + // Maximum number of outstanding transactions per R channel (ACE) + parameter int unsigned MAX_R_TRANS = 32'd8, + // if enabled, this multiplexer is purely combinatorial + parameter bit FALL_THROUGH = 1'b0, + // add spill register on write master ports, adds a cycle latency on write channels + parameter bit SPILL_AW = 1'b1, + parameter bit SPILL_W = 1'b0, + parameter bit SPILL_B = 1'b0, + // add spill register on read master ports, adds a cycle latency on read channels + parameter bit SPILL_AR = 1'b1, + parameter bit SPILL_R = 1'b0, + // add registers on xACK ports, add a cycle latency on acknowledgment signals (ACE) + parameter bit REG_ACK = 1'b0 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + ACE_BUS.Slave slv [NO_SLV_PORTS-1:0], // slave ports + ACE_BUS.Master mst // master port +); + + typedef logic [SLV_AXI_ID_WIDTH-1:0] slv_id_t; + typedef logic [MST_AXI_ID_WIDTH-1:0] mst_id_t; + typedef logic [AXI_ADDR_WIDTH -1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + // channels typedef + `ACE_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, slv_id_t, user_t) + `ACE_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, mst_id_t, user_t) + + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + + `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, slv_id_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, mst_id_t, user_t) + + `ACE_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, slv_id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, mst_id_t, user_t) + + `ACE_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, slv_id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, mst_id_t, user_t) + + `ACE_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) + `ACE_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t) + + `ACE_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) + `ACE_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t) + + slv_req_t [NO_SLV_PORTS-1:0] slv_reqs; + slv_resp_t [NO_SLV_PORTS-1:0] slv_resps; + mst_req_t mst_req; + mst_resp_t mst_resp; + + for (genvar i = 0; i < NO_SLV_PORTS; i++) begin : gen_assign_slv_ports + `ACE_ASSIGN_TO_REQ(slv_reqs[i], slv[i]) + `ACE_ASSIGN_FROM_RESP(slv[i], slv_resps[i]) + end + + `ACE_ASSIGN_FROM_REQ(mst, mst_req) + `ACE_ASSIGN_TO_RESP(mst_resp, mst) + + ace_mux #( + .SlvAxiIDWidth(SLV_AXI_ID_WIDTH), + .slv_aw_chan_t(slv_aw_chan_t), // AW Channel Type, slave ports + .mst_aw_chan_t(mst_aw_chan_t), // AW Channel Type, master port + .w_chan_t (w_chan_t), // W Channel Type, all ports + .slv_b_chan_t (slv_b_chan_t), // B Channel Type, slave ports + .mst_b_chan_t (mst_b_chan_t), // B Channel Type, master port + .slv_ar_chan_t(slv_ar_chan_t), // AR Channel Type, slave ports + .mst_ar_chan_t(mst_ar_chan_t), // AR Channel Type, master port + .slv_r_chan_t (slv_r_chan_t), // R Channel Type, slave ports + .mst_r_chan_t (mst_r_chan_t), // R Channel Type, master port + .slv_req_t (slv_req_t), + .slv_resp_t (slv_resp_t), + .mst_req_t (mst_req_t), + .mst_resp_t (mst_resp_t), + .NoSlvPorts (NO_SLV_PORTS), // Number of slave ports + .MaxWTrans (MAX_W_TRANS), + .MaxBTrans (MAX_B_TRANS), + .MaxRTrans (MAX_R_TRANS), + .FallThrough (FALL_THROUGH), + .SpillAw (SPILL_AW), + .SpillW (SPILL_W), + .SpillB (SPILL_B), + .SpillAr (SPILL_AR), + .SpillR (SPILL_R), + .RegAck (REG_ACK) + ) i_ace_mux ( + .clk_i (clk_i), // Clock + .rst_ni (rst_ni), // Asynchronous reset active low + .test_i (test_i), // Test Mode enable + .slv_reqs_i (slv_reqs), + .slv_resps_o(slv_resps), + .mst_req_o (mst_req), + .mst_resp_i (mst_resp) + ); +endmodule + +module ace_mux_xack +// Parameters +// {{{ +#( + parameter int unsigned N = 0, + parameter int unsigned MAX_OUTSTANDING = 4, + parameter bit FALL_THROUGH = 0, + localparam int unsigned IDX_WIDTH = N > 1 ? $clog2(N) : 1, + localparam type idx_t = logic [IDX_WIDTH-1:0] +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic empty_o, + output logic full_o, + + input logic handshake_i, + input idx_t idx_i, + + input logic [N-1:0] acks_i, + + output logic ack_o +); + // }}} + + // Internal signals + // {{{ + idx_t sel; + logic [N-1:0] sel_bv; + logic [N-1:0] ack_gnts; + logic [N-1:0] ack_reqs; + // }}} + + // Track response ordering with a FIFO + // {{{ + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DEPTH (MAX_OUTSTANDING), + .dtype (idx_t) + ) i_sel_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (full_o), + .empty_o (empty_o), + .usage_o (), + .data_i (idx_i), + .push_i (handshake_i), + .data_o (sel), + .pop_i (ack_o) + ); + // }}} + + // Per-slv port credit counter + // An out-of-order xACK signal increases + // the counter by 1 + // When the FIFO selects the counter, + // it is decreased by 1 + // {{{ + for (genvar i = 0; i < N; i++) begin : gen_credit_counters + credit_counter #( + .NumCredits (MAX_OUTSTANDING), + .InitCreditEmpty(1'b1) + ) u_credit_counter ( + .clk_i, + .rst_ni, + .credit_o ( /* unused */), + .credit_give_i(acks_i[i]), + .credit_take_i(ack_gnts[i]), + .credit_init_i('0), + .credit_left_o(ack_reqs[i]), + .credit_crit_o( /* unused */), + .credit_full_o( /* unused */) + ); + end + // }}} + + // xACK generation + // {{{ + assign sel_bv = N'(1) << sel; + // In FALL_THROUGH mode, a concurrent increase and decrease of the counter + // does not alter its value and the xACK signal is combinationally generated + assign ack_gnts = sel_bv & (ack_reqs | (FALL_THROUGH ? acks_i : '0)); + assign ack_o = |ack_gnts; + // }}} + +endmodule From 50a0c490360b2cf9bbcf424f4e19e856e2ec4aa0 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 1 Aug 2025 18:58:36 +0200 Subject: [PATCH 069/109] ccu: use ACE mux and demux modules --- src/ccu/ace_ccu_frontend.sv | 82 +++++------------------------ src/ccu/ace_ccu_top.sv | 10 ++-- src/ccu/ace_ccu_tracker.sv | 102 +++++++++++++++--------------------- 3 files changed, 60 insertions(+), 134 deletions(-) diff --git a/src/ccu/ace_ccu_frontend.sv b/src/ccu/ace_ccu_frontend.sv index 76f663a..d0ee18a 100644 --- a/src/ccu/ace_ccu_frontend.sv +++ b/src/ccu/ace_ccu_frontend.sv @@ -39,10 +39,7 @@ module ace_ccu_frontend output midend_req_t ccu_nonshareable_req_o, input midend_resp_t ccu_nonshareable_resp_i, output midend_req_t ccu_shareable_req_o, - input midend_resp_t ccu_shareable_resp_i, - - output slv_bv_t ccu_shareable_rack_o, - output slv_bv_t ccu_shareable_wack_o + input midend_resp_t ccu_shareable_resp_i ); // Internal signals @@ -54,11 +51,6 @@ module ace_ccu_frontend slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_nonshareable_resp; slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_req; slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_resp; - - slv_bv_t slv_r_nonshareable; - slv_bv_t slv_b_nonshareable; - slv_bv_t slv_rack_nonshareable; - slv_bv_t slv_wack_nonshareable; // }}} // Slv demuxes @@ -105,11 +97,14 @@ module ace_ccu_frontend slv_req_cut[i].ar.bar[0], slv_req_cut[i].ar.domain, slv_req_cut[i].ar.snoop ); - axi_demux_simple #( + ace_demux_simple #( .AxiIdWidth (CcuCfg.u.AxiSlvIdWidth), .AtopSupport(1'b1), - .axi_req_t (slv_req_t), - .axi_resp_t (slv_resp_t), + .aw_chan_t (slv_aw_t), + .w_chan_t (w_t), + .ar_chan_t (slv_ar_t), + .req_t (slv_req_t), + .resp_t (slv_resp_t), .NoMstPorts (2), .MaxTrans (CcuCfg.u.MaxTransactions), .AxiLookBits(CcuCfg.u.AxiIdLookupBits), @@ -123,16 +118,14 @@ module ace_ccu_frontend .slv_aw_select_i(aw_is_nonblocking), .slv_ar_select_i(ar_is_read_no_snoop), .mst_reqs_o ({slv_nonshareable_req[i], slv_shareable_req[i]}), - .mst_resps_i ({slv_nonshareable_resp[i], slv_shareable_resp[i]}), - .mst_b_idx_o (slv_b_nonshareable[i]), - .mst_r_idx_o (slv_r_nonshareable[i]) + .mst_resps_i ({slv_nonshareable_resp[i], slv_shareable_resp[i]}) ); end // }}} // Nonshareable mux // {{{ - axi_mux #( + ace_mux #( .SlvAxiIDWidth(CcuCfg.u.AxiSlvIdWidth), .slv_aw_chan_t(slv_aw_t), .mst_aw_chan_t(midend_aw_t), @@ -149,6 +142,8 @@ module ace_ccu_frontend .mst_resp_t (midend_resp_t), .NoSlvPorts (CcuCfg.u.SlvPorts), .MaxWTrans (32'd8), + .MaxBTrans (CcuCfg.u.MaxTransactions), + .MaxRTrans (CcuCfg.u.MaxTransactions), .FallThrough (1'b1), .SpillAw (1'b0), .SpillW (1'b0), @@ -168,7 +163,7 @@ module ace_ccu_frontend // Nonshareable demux // {{{ - axi_mux #( + ace_mux #( .SlvAxiIDWidth(CcuCfg.u.AxiSlvIdWidth), .slv_aw_chan_t(slv_aw_t), .mst_aw_chan_t(midend_aw_t), @@ -185,6 +180,8 @@ module ace_ccu_frontend .mst_resp_t (midend_resp_t), .NoSlvPorts (CcuCfg.u.SlvPorts), .MaxWTrans (32'd8), + .MaxBTrans (CcuCfg.u.MaxTransactions), + .MaxRTrans (CcuCfg.u.MaxTransactions), .FallThrough (1'b1), .SpillAw (1'b0), .SpillW (1'b0), @@ -202,55 +199,4 @@ module ace_ccu_frontend ); // }}} - // Sharebale xacks generation - // {{{ - for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_xack_fifos - logic r_push, b_push; - - assign r_push = slv_resp_cut[i].r_valid && slv_req_cut[i].r_ready && slv_resp_cut[i].r.last; - assign b_push = slv_resp_cut[i].b_valid && slv_req_cut[i].b_ready; - - fifo_v3 #( - .FALL_THROUGH(1'b0), - .DATA_WIDTH (1), - .DEPTH (CcuCfg.u.MaxTransactions) - ) u_r_tid_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .full_o (), - .empty_o (), - .usage_o (), - .data_i (slv_r_nonshareable[i]), - .push_i (r_push), - .data_o (slv_rack_nonshareable[i]), - .pop_i (slv_req_cut[i].rack) - ); - - fifo_v3 #( - .FALL_THROUGH(1'b0), - .DATA_WIDTH (1), - .DEPTH (CcuCfg.u.MaxTransactions) - ) u_b_tid_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .full_o (), - .empty_o (), - .usage_o (), - .data_i (slv_b_nonshareable[i]), - .push_i (b_push), - .data_o (slv_wack_nonshareable[i]), - .pop_i (slv_req_cut[i].wack) - ); - end - - for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_shareable_xacks - assign ccu_shareable_rack_o[i] = slv_req_i[i].rack && !slv_rack_nonshareable[i]; - assign ccu_shareable_wack_o[i] = slv_req_i[i].wack && !slv_wack_nonshareable[i]; - end - // }}} - endmodule diff --git a/src/ccu/ace_ccu_top.sv b/src/ccu/ace_ccu_top.sv index cea7a1d..4870cea 100644 --- a/src/ccu/ace_ccu_top.sv +++ b/src/ccu/ace_ccu_top.sv @@ -113,8 +113,6 @@ module ace_ccu_top midend_resp_t ccu_nonshareable_resp; midend_req_t ccu_shareable_req; midend_resp_t ccu_shareable_resp; - slv_bv_t ccu_shareable_rack; - slv_bv_t ccu_shareable_wack; midend_ar_t replay_ar; logic replay_ar_valid; @@ -221,9 +219,7 @@ module ace_ccu_top .ccu_nonshareable_req_o (ccu_nonshareable_req), .ccu_nonshareable_resp_i(ccu_nonshareable_resp), .ccu_shareable_req_o (ccu_shareable_req), - .ccu_shareable_resp_i (ccu_shareable_resp), - .ccu_shareable_rack_o (ccu_shareable_rack), - .ccu_shareable_wack_o (ccu_shareable_wack) + .ccu_shareable_resp_i (ccu_shareable_resp) ); // }}} @@ -380,8 +376,8 @@ module ace_ccu_top .alloc_nline_i (tracker_alloc_nline), .alloc_id_i (tracker_alloc_id), .alloc_tid_o (tracker_alloc_tid), - .dealloc_rack_i (ccu_shareable_rack), - .dealloc_wack_i (ccu_shareable_wack), + .dealloc_rack_i (ccu_shareable_req.rack), + .dealloc_wack_i (ccu_shareable_req.wack), .dealloc_r_resp_i (tracker_dealloc_r_resp), .dealloc_r_resp_id_i (tracker_dealloc_r_resp_id), .dealloc_b_resp_i (tracker_dealloc_b_resp), diff --git a/src/ccu/ace_ccu_tracker.sv b/src/ccu/ace_ccu_tracker.sv index d5db5af..468b8fe 100644 --- a/src/ccu/ace_ccu_tracker.sv +++ b/src/ccu/ace_ccu_tracker.sv @@ -40,8 +40,8 @@ module ace_ccu_tracker // Lookup/dealloc interface // {{{ - input slv_bv_t dealloc_rack_i, - input slv_bv_t dealloc_wack_i, + input logic dealloc_rack_i, + input logic dealloc_wack_i, input logic dealloc_r_resp_i, input midend_id_t dealloc_r_resp_id_i, input logic dealloc_b_resp_i, @@ -89,8 +89,8 @@ module ace_ccu_tracker tid_t rack_queue_wdata; tid_t wack_queue_wdata; - tid_t [ CcuCfg.u.SlvPorts-1:0] rack_queue_rdata; - tid_t [ CcuCfg.u.SlvPorts-1:0] wack_queue_rdata; + tid_t rack_queue_rdata; + tid_t wack_queue_rdata; // }}} // Alloc logic @@ -114,19 +114,12 @@ module ace_ccu_tracker // Dealloc logic // {{{ - - // Deallocation logic has some complexity due to the need of handling the rack and wack signals - // from all master, which cannot be stalled and can arrive in parallel in the same cycle - // TODO: can this be simplified? for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_dealloc - slv_idx_t dealloc_slv_id; - assign dealloc_slv_id = data_q[i].id[CcuCfg.AxiMidendIdWidth-1 : CcuCfg.u.AxiSlvIdWidth]; - assign meta_clr[i].r = dealloc_rack_i[dealloc_slv_id] && (i == rack_queue_rdata[dealloc_slv_id]); - assign meta_clr[i].b = dealloc_wack_i[dealloc_slv_id] && (i == wack_queue_rdata[dealloc_slv_id]); - assign valid_clr[i] = ~|meta_d[i]; + assign meta_clr[i].r = dealloc_rack_i && (i == rack_queue_rdata); + assign meta_clr[i].b = dealloc_wack_i && (i == wack_queue_rdata); + assign valid_clr[i] = ~|meta_d[i]; end - always_comb begin : xack_queue_wdata_mux rack_queue_wdata = '0; wack_queue_wdata = '0; @@ -139,51 +132,42 @@ module ace_ccu_tracker end end - for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_xack_queues - logic wack_queue_push; - logic rack_queue_push; - - // Push an entry ID to the wack/rack queues if the dealloc response matches the ID of the transaction - // that is being deallocated - assign wack_queue_push = dealloc_b_resp_i && data_q[wack_queue_wdata].id[CcuCfg.AxiMidendIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); - assign rack_queue_push = dealloc_r_resp_i && data_q[rack_queue_wdata].id[CcuCfg.AxiMidendIdWidth-1 : CcuCfg.u.AxiSlvIdWidth] == CcuCfg.SlvPortIdxWidth'(i); - - fifo_v3 #( - .FALL_THROUGH(1'b0), - .DEPTH (CcuCfg.u.MaxTransactions), - .dtype (tid_t) - ) u_tracker_wack_queue ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .full_o (), - .empty_o (), - .usage_o (), - .data_i (wack_queue_wdata), - .push_i (wack_queue_push), - .data_o (wack_queue_rdata[i]), - .pop_i (dealloc_wack_i[i]) - ); - - fifo_v3 #( - .FALL_THROUGH(1'b0), - .DEPTH (CcuCfg.u.MaxTransactions), - .dtype (tid_t) - ) u_tracker_rack_queue ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .full_o (), - .empty_o (), - .usage_o (), - .data_i (rack_queue_wdata), - .push_i (rack_queue_push), - .data_o (rack_queue_rdata[i]), - .pop_i (dealloc_rack_i[i]) - ); - end + // Push an entry ID to the wack/rack queues once a response handshake happens + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DEPTH (CcuCfg.u.MaxTransactions), + .dtype (tid_t) + ) u_tracker_wack_queue ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (), + .empty_o (), + .usage_o (), + .data_i (wack_queue_wdata), + .push_i (dealloc_b_resp_i), + .data_o (wack_queue_rdata), + .pop_i (dealloc_wack_i) + ); + + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DEPTH (CcuCfg.u.MaxTransactions), + .dtype (tid_t) + ) u_tracker_rack_queue ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (), + .empty_o (), + .usage_o (), + .data_i (rack_queue_wdata), + .push_i (dealloc_r_resp_i), + .data_o (rack_queue_rdata), + .pop_i (dealloc_rack_i) + ); // }}} // State holding elements From d298dfe1ee1db37b84202bf642c7b3b1b9766252 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 2 Sep 2025 12:07:56 +0200 Subject: [PATCH 070/109] Bender.yml: bump `axi` --- Bender.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Bender.yml b/Bender.yml index 9e755a2..549a50b 100644 --- a/Bender.yml +++ b/Bender.yml @@ -5,7 +5,7 @@ package: - "Riccardo Tedeschi " dependencies: - axi: { git: "https://github.com/pulp-platform/axi.git", rev: 37fa3a93 } # branch: rt/ace + axi: { git: "https://github.com/pulp-platform/axi.git", rev: e4199992 } # branch: multicore/devel common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.38.0 } axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", version: 0.8.2} From c5db5fb51df70aab3a89b0740b61af6efa08d8f9 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 2 Sep 2025 12:09:14 +0200 Subject: [PATCH 071/109] ccu_frontend: integrate LR/SC monitor --- Bender.yml | 2 +- src/ccu/ace_ccu_frontend.sv | 77 +++++++++++++++++++++++++++++++++---- src/ccu/ace_ccu_pkg.sv | 11 ++++++ 3 files changed, 81 insertions(+), 9 deletions(-) diff --git a/Bender.yml b/Bender.yml index 549a50b..01901a0 100644 --- a/Bender.yml +++ b/Bender.yml @@ -7,7 +7,7 @@ package: dependencies: axi: { git: "https://github.com/pulp-platform/axi.git", rev: e4199992 } # branch: multicore/devel common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.38.0 } - axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", version: 0.8.2} + axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", rev: 7a7bf983} # branch: multicore/devel export_include_dirs: - include diff --git a/src/ccu/ace_ccu_frontend.sv b/src/ccu/ace_ccu_frontend.sv index d0ee18a..ceaf20f 100644 --- a/src/ccu/ace_ccu_frontend.sv +++ b/src/ccu/ace_ccu_frontend.sv @@ -9,6 +9,9 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +`include "ace/typedef.svh" +`include "ace/assign.svh" + module ace_ccu_frontend import ace_pkg::*; import ace_ccu_pkg::*; @@ -44,13 +47,16 @@ module ace_ccu_frontend // Internal signals // {{{ - slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_req_cut; - slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_resp_cut; + slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_req_cut; + slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_resp_cut; + + slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_nonshareable_req; + slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_nonshareable_resp; + slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_req; + slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_resp; - slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_nonshareable_req; - slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_nonshareable_resp; - slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_req; - slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_resp; + midend_req_t mux_shareable_req; + midend_resp_t mux_shareable_resp; // }}} // Slv demuxes @@ -194,8 +200,63 @@ module ace_ccu_frontend .test_i (1'b0), .slv_reqs_i (slv_shareable_req), .slv_resps_o(slv_shareable_resp), - .mst_req_o (ccu_shareable_req_o), - .mst_resp_i (ccu_shareable_resp_i) + .mst_req_o (mux_shareable_req), + .mst_resp_i (mux_shareable_resp) + ); + // }}} + + // AMO LR/SC monitor + // {{{ + + localparam longint unsigned ADDR_BEGIN = '0; + localparam longint unsigned ADDR_END = {CcuCfg.u.AxiAddrWidth{1'b1}}; + + // All subsequent ACE defines will not use RACK/WACKS + `define __ACE_NO_ACKS + + // Internal request type without acks + `ACE_TYPEDEF_REQ_T(__midend_req_t, midend_aw_t, w_t, midend_ar_t) + + __midend_req_t __mux_shareable_req; + __midend_req_t __ccu_shareable_req; + + `ACE_ASSIGN_REQ_STRUCT(__mux_shareable_req, mux_shareable_req) + `ACE_ASSIGN_REQ_STRUCT(ccu_shareable_req_o, __ccu_shareable_req) + + // xACK bypass + assign ccu_shareable_req_o.wack = mux_shareable_req.wack; + assign ccu_shareable_req_o.rack = mux_shareable_req.rack; + + `undef __ACE_NO_ACKS + + axi_riscv_lrsc_structs #( + .ADDR_BEGIN (ADDR_BEGIN), + .ADDR_END (ADDR_END), + .AXI_ADDR_WIDTH (CcuCfg.u.AxiAddrWidth), + .AXI_DATA_WIDTH (CcuCfg.u.AxiDataWidth), + .AXI_ID_WIDTH (CcuCfg.AxiMidendIdWidth), + .AXI_USER_WIDTH (CcuCfg.u.AxiUserWidth), + .AXI_MAX_READ_TXNS (CcuCfg.u.MaxTransactions), + .AXI_MAX_WRITE_TXNS (CcuCfg.u.MaxTransactions), + .AXI_USER_AS_ID (CcuCfg.u.AmoAxiUserAsId), + .AXI_USER_ID_MSB (CcuCfg.u.AmoAxiUserIdMsb), + .AXI_USER_ID_LSB (CcuCfg.u.AmoAxiUserIdLsb), + .AXI_ADDR_LSB (CcuCfg.u.AmoAxiAddrLsb), + .FULL_BANDWIDTH (1), + .CUT_OUP_POP_INP_GNT(0), + .NUM_RESERVATIONS (CcuCfg.u.AmoNumReservations), + .aw_chan_t (midend_aw_t), + .b_chan_t (midend_b_t), + .r_chan_t (midend_r_t), + .req_t (__midend_req_t), + .resp_t (midend_resp_t) + ) u_axi_riscv_lrsc ( + .clk_i, + .rst_ni, + .slv_req_i (__mux_shareable_req), + .slv_resp_o(mux_shareable_resp), + .mst_req_o (__ccu_shareable_req), + .mst_resp_i(ccu_shareable_resp_i) ); // }}} diff --git a/src/ccu/ace_ccu_pkg.sv b/src/ccu/ace_ccu_pkg.sv index ba376e7..c9da615 100644 --- a/src/ccu/ace_ccu_pkg.sv +++ b/src/ccu/ace_ccu_pkg.sv @@ -40,6 +40,17 @@ package ace_ccu_pkg; bit CutMstResp; bit CutSnoopReq; bit CutSnoopResp; + // LR/SC reservation buffer + // Use the AXI User signal instead of the AXI ID to track reservations + bit AmoAxiUserAsId; + // MSB of the ID in the user signal + int unsigned AmoAxiUserIdMsb; + // LSB of the ID in the user signal + int unsigned AmoAxiUserIdLsb; + // log2 of granularity for reservations (ignored LSBs) + int unsigned AmoAxiAddrLsb; + // Number of simultaineous reservations + int unsigned AmoNumReservations; } ace_ccu_user_cfg_t; typedef struct packed { From bd99a72e7edd164b0edc4e7bae3b865ef793336a Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 2 Sep 2025 12:12:49 +0200 Subject: [PATCH 072/109] ccu_read: ensure per-master burst locking for R responses --- src/ccu/ace_ccu_read.sv | 55 ++++++++++++++++++++++++++++++++++++++--- src/ccu/ace_ccu_top.sv | 3 ++- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/ccu/ace_ccu_read.sv b/src/ccu/ace_ccu_read.sv index 46aa8f7..d211208 100644 --- a/src/ccu/ace_ccu_read.sv +++ b/src/ccu/ace_ccu_read.sv @@ -21,7 +21,8 @@ module ace_ccu_read parameter type tid_t = logic, parameter type backend_ar_t = logic, parameter type backend_r_t = logic, - parameter type midend_r_t = logic + parameter type midend_r_t = logic, + parameter type slv_idx_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -76,6 +77,15 @@ module ace_ccu_read // {{{ `AXI_TO_ACE_ASSIGN_R_STRUCT(mem_r, r_i) + typedef struct packed { + logic mem; + logic cd; + } req_mask_t; + + req_mask_t [CcuCfg.u.SlvPorts-1:0] req_mask_q; + req_mask_t [CcuCfg.u.SlvPorts-1:0] req_mask_d; + req_mask_t req_mask; + rr_arb_tree #( .NumIn (2), .DataType (midend_r_t), @@ -86,14 +96,53 @@ module ace_ccu_read .rst_ni, .flush_i(1'b0), .rr_i (1'b0), - .req_i ({r_valid_i, cd_r_valid_i}), + .req_i ({r_valid_i, cd_r_valid_i} & ~req_mask), .gnt_o ({r_ready_o, cd_r_ready_o}), .data_i ({mem_r, cd_r_i}), .req_o (r_valid_o), .gnt_i (r_ready_i), .data_o (r_o), - .idx_o () + .idx_o (r_arb) ); + + assign req_mask.mem = req_mask_q[mem_r.id>>CcuCfg.u.AxiSlvIdWidth].mem; + assign req_mask.cd = req_mask_q[cd_r_i.id>>CcuCfg.u.AxiSlvIdWidth].cd; + + for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin + slv_idx_t slv_idx; + logic read_mem_q; + logic read_mem_d; + logic read_busy_q; + logic read_busy_d; + + assign slv_idx = r_o.id >> CcuCfg.u.AxiSlvIdWidth; + + always_comb begin + read_busy_d = read_busy_q; + req_mask_d[i] = req_mask_q[i]; + + if (read_busy_q) begin + if (r_valid_o && r_ready_i && r_o.last) begin + read_busy_d = 1'b0; + req_mask_d[i] = '0; + end + end else if (slv_idx == i && r_valid_o && r_ready_i && !r_o.last) begin + read_busy_d = 1'b1; + req_mask_d[i].mem = ~r_arb; + req_mask_d[i].cd = r_arb; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + read_busy_q <= 1'b0; + req_mask_q[i] <= '0; + end else begin + read_busy_q <= read_busy_d; + req_mask_q[i] <= req_mask_d[i]; + end + end + end // }}} diff --git a/src/ccu/ace_ccu_top.sv b/src/ccu/ace_ccu_top.sv index 4870cea..8d09bb5 100644 --- a/src/ccu/ace_ccu_top.sv +++ b/src/ccu/ace_ccu_top.sv @@ -451,7 +451,8 @@ module ace_ccu_top .tid_t (tid_t), .backend_ar_t(backend_ar_t), .backend_r_t (backend_r_t), - .midend_r_t (midend_r_t) + .midend_r_t (midend_r_t), + .slv_idx_t (slv_idx_t) ) u_ace_ccu_read_unit ( .clk_i, .rst_ni, From e52738b033b1829e6040ba097eac2e396d1146ca Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 10 Dec 2025 12:45:02 +0100 Subject: [PATCH 073/109] treewide: CCU refactoring and repository cleanup --- Bender.yml | 60 +- include/ace/assign.svh | 21 +- include/ace/domain.svh | 26 +- include/ace/typedef.svh | 10 +- src/ace_cut.sv | 250 --- src/ace_demux_simple.sv | 160 -- src/ace_mux.sv | 424 ----- src/ace_pkg.sv | 14 +- src/ace_snoop_cut.sv | 160 -- src/ccu/ace_ccu_ax_arbiter.sv | 175 -- src/ccu/ace_ccu_cd_arbiter.sv | 145 -- src/ccu/ace_ccu_cd_ctrl.sv | 262 --- src/ccu/ace_ccu_frontend.sv | 263 --- src/ccu/ace_ccu_pkg.sv | 98 - src/ccu/ace_ccu_read.sv | 150 -- src/ccu/ace_ccu_snoop_pipe.sv | 351 ---- src/ccu/ace_ccu_top.sv | 710 -------- src/ccu/ace_ccu_tracker.sv | 223 --- src/ccu/ace_ccu_write.sv | 261 --- src/ccu/ccu_exclusive_monitor.sv | 229 +++ src/ccu/ccu_frontend.sv | 268 +++ src/ccu/ccu_pkg.sv | 83 + src/ccu/ccu_read_engine.sv | 108 ++ src/ccu/ccu_replay.sv | 33 + src/ccu/ccu_scoreboard.sv | 118 ++ src/ccu/ccu_snoop_pipeline.sv | 608 +++++++ src/ccu/ccu_top.sv | 363 ++++ src/ccu/ccu_write_engine.sv | 210 +++ src/ccu/deprecated/ccu_ctrl.sv | 573 ------ src/ccu/deprecated/ccu_ctrl_decoder.sv | 603 ------- src/ccu/deprecated/ccu_ctrl_memory_unit.sv | 386 ---- src/ccu/deprecated/ccu_ctrl_pkg.sv | 19 - src/ccu/deprecated/ccu_ctrl_snoop_unit.sv | 185 -- src/deprecated/ace_sim_master.sv | 1543 ---------------- src/deprecated/ace_test.sv | 1873 -------------------- src/deprecated/snoop_test.sv | 705 -------- 36 files changed, 2065 insertions(+), 9605 deletions(-) delete mode 100644 src/ace_cut.sv delete mode 100644 src/ace_demux_simple.sv delete mode 100644 src/ace_mux.sv delete mode 100644 src/ace_snoop_cut.sv delete mode 100644 src/ccu/ace_ccu_ax_arbiter.sv delete mode 100644 src/ccu/ace_ccu_cd_arbiter.sv delete mode 100644 src/ccu/ace_ccu_cd_ctrl.sv delete mode 100644 src/ccu/ace_ccu_frontend.sv delete mode 100644 src/ccu/ace_ccu_pkg.sv delete mode 100644 src/ccu/ace_ccu_read.sv delete mode 100644 src/ccu/ace_ccu_snoop_pipe.sv delete mode 100644 src/ccu/ace_ccu_top.sv delete mode 100644 src/ccu/ace_ccu_tracker.sv delete mode 100644 src/ccu/ace_ccu_write.sv create mode 100644 src/ccu/ccu_exclusive_monitor.sv create mode 100644 src/ccu/ccu_frontend.sv create mode 100644 src/ccu/ccu_pkg.sv create mode 100644 src/ccu/ccu_read_engine.sv create mode 100644 src/ccu/ccu_replay.sv create mode 100644 src/ccu/ccu_scoreboard.sv create mode 100644 src/ccu/ccu_snoop_pipeline.sv create mode 100644 src/ccu/ccu_top.sv create mode 100644 src/ccu/ccu_write_engine.sv delete mode 100644 src/ccu/deprecated/ccu_ctrl.sv delete mode 100644 src/ccu/deprecated/ccu_ctrl_decoder.sv delete mode 100644 src/ccu/deprecated/ccu_ctrl_memory_unit.sv delete mode 100644 src/ccu/deprecated/ccu_ctrl_pkg.sv delete mode 100644 src/ccu/deprecated/ccu_ctrl_snoop_unit.sv delete mode 100644 src/deprecated/ace_sim_master.sv delete mode 100644 src/deprecated/ace_test.sv delete mode 100644 src/deprecated/snoop_test.sv diff --git a/Bender.yml b/Bender.yml index 01901a0..4bff8bb 100644 --- a/Bender.yml +++ b/Bender.yml @@ -1,44 +1,34 @@ package: name: ace + # Authors in alphabetical order (surname) authors: - "Aleksi Korsman " - "Riccardo Tedeschi " dependencies: - axi: { git: "https://github.com/pulp-platform/axi.git", rev: e4199992 } # branch: multicore/devel - common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.38.0 } - axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", rev: 7a7bf983} # branch: multicore/devel + axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.9 } + common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.39.0 } + axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", rev: 6d3c8b4} # branch: master export_include_dirs: - include sources: - # Source files grouped in levels. Files in level 0 have no dependencies on files in this - # package. Files in level 1 only depend on files in level 0, files in level 2 on files in - # levels 1 and 0, etc. Files within a level are ordered alphabetically. - # Level 0 + # Generic ACE package and interfaces - src/ace_pkg.sv - # Level 1 - src/ace_intf.sv - src/snoop_intf.sv - # ACE ips - - src/ace_cut.sv - - src/ace_snoop_cut.sv - - src/ace_demux_simple.sv - - src/ace_mux.sv - # CCU src files - - src/ccu/ace_ccu_pkg.sv - - src/ccu/ace_ccu_ax_arbiter.sv - - src/ccu/ace_ccu_cd_arbiter.sv - - src/ccu/ace_ccu_cd_ctrl.sv - - src/ccu/ace_ccu_frontend.sv - - src/ccu/ace_ccu_read.sv - - src/ccu/ace_ccu_snoop_pipe.sv - - src/ccu/ace_ccu_tracker.sv - - src/ccu/ace_ccu_write.sv - - src/ccu/ace_ccu_top.sv - - + # CCU package + - src/ccu/ccu_pkg.sv + # CCU source files + - src/ccu/ccu_exclusive_monitor.sv + - src/ccu/ccu_frontend.sv + - src/ccu/ccu_read_engine.sv + - src/ccu/ccu_replay.sv + - src/ccu/ccu_scoreboard.sv + - src/ccu/ccu_snoop_pipeline.sv + - src/ccu/ccu_top.sv + - src/ccu/ccu_write_engine.sv #- target: simulation # files: @@ -56,12 +46,12 @@ sources: # files: # - src/ccu/ccu_ctrl_wr_snoop.sv - - target: test - files: - # Level 0 - - test/vip/ace_test_pkg.sv - - test/vip/snoop_test_pkg.sv - # Level 1 - - test/vip/cache_test_pkg.sv - # Level 2 - - test/tb_ace_ccu_top.sv + # - target: test + # files: + # # Level 0 + # - test/vip/ace_test_pkg.sv + # - test/vip/snoop_test_pkg.sv + # # Level 1 + # - test/vip/cache_test_pkg.sv + # # Level 2 + # - test/tb_ace_ccu_top.sv diff --git a/include/ace/assign.svh b/include/ace/assign.svh index e47d02c..f2d6cdc 100644 --- a/include/ace/assign.svh +++ b/include/ace/assign.svh @@ -17,6 +17,7 @@ `define ACE_ASSIGN_SVH_ `include "axi/assign.svh" +`include "ace/assign.svh" //////////////////////////////////////////////////////////////////////////////////////////////////// // Internal implementation for assigning one ACE struct or interface to another struct or interface. @@ -71,11 +72,7 @@ __opt_as __lhs.b_ready = __rhs.b_ready; \ `__ACE_TO_AR(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \ __opt_as __lhs.ar_valid = __rhs.ar_valid; \ - __opt_as __lhs.r_ready = __rhs.r_ready; \ - `ifndef __ACE_NO_ACKS \ - __opt_as __lhs.wack = __rhs.wack; \ - __opt_as __lhs.rack = __rhs.rack; \ - `endif + __opt_as __lhs.r_ready = __rhs.r_ready; `define __ACE_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ __opt_as __lhs.aw_ready = __rhs.aw_ready; \ __opt_as __lhs.ar_ready = __rhs.ar_ready; \ @@ -118,11 +115,7 @@ `AXI_ASSIGN_W(slv, mst) \ `AXI_ASSIGN_B(mst, slv) \ `ACE_ASSIGN_AR(slv, mst) \ - `ACE_ASSIGN_R(mst, slv) \ - `ifndef __ACE_NO_ACKS \ - assign slv.wack = mst.wack; \ - assign slv.rack = mst.rack; \ - `endif + `ACE_ASSIGN_R(mst, slv) //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -150,11 +143,7 @@ assign mon_dv.ar_ready = axi_if.ar_ready; \ `__ACE_TO_R(assign, mon_dv.r, _, axi_if.r, _) \ assign mon_dv.r_valid = axi_if.r_valid; \ - assign mon_dv.r_ready = axi_if.r_ready; \ - `ifndef __ACE_NO_ACKS \ - assign mon_dv.wack = axi_if.wack; \ - assign mon_dv.rack = axi_if.rack; \ - `endif + assign mon_dv.r_ready = axi_if.r_ready; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -323,7 +312,7 @@ __opt_as __lhs.cd_valid = __rhs.cd_valid; \ `__SNOOP_TO_CD(__opt_as, __lhs.cd, __lhs_sep, __rhs.cd, __rhs_sep) \ __opt_as __lhs.cr_valid = __rhs.cr_valid; \ - __opt_as __lhs.cr_resp = __rhs.cr_resp; + __opt_as __lhs.cr = __rhs.cr; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/include/ace/domain.svh b/include/ace/domain.svh index 0b4772b..9b1ec81 100644 --- a/include/ace/domain.svh +++ b/include/ace/domain.svh @@ -17,28 +17,14 @@ // Domain types // ////////////////// -`define DOMAIN_BV_T(__width) \ - logic [__width-1:0] - -`define DOMAIN_RULE_T(__bv_t) \ +`define ACE_DECLARE_DOMAIN_MAP_T(__num_subordinates) \ struct packed { \ - __bv_t initiator; \ - __bv_t inner; \ - __bv_t outer; \ + logic [__num_subordinates-1:0] initiator; \ + logic [__num_subordinates-1:0] inner; \ + logic [__num_subordinates-1:0] outer; \ } -`define DOMAIN_TYPEDEF_BV_T(__width, __bv_t) \ - typedef logic [__width-1:0] __bv_t; - -`define DOMAIN_TYPEDEF_RULE_T(__bv_t, __set_t) \ - typedef struct packed { \ - __bv_t initiator; \ - __bv_t inner; \ - __bv_t outer; \ - } __set_t; - -`define DOMAIN_TYPEDEF_ALL(__width, __bv_t, __set_t) \ - `DOMAIN_TYPEDEF_BV_T(__width, __bv_t) \ - `DOMAIN_TYPEDEF_RULE_T(__bv_t, __set_t) +`define ACE_TYPEDEF_DOMAIN_TYPEDEF_MAP_T(__num_subordinates, __map_t) \ + typedef `ACE_DECLARE_DOMAIN_MAP_T(__num_subordinates) __map_t; `endif // ACE_DOMAIN_SVH_ diff --git a/include/ace/typedef.svh b/include/ace/typedef.svh index 72e1740..54fffa9 100644 --- a/include/ace/typedef.svh +++ b/include/ace/typedef.svh @@ -81,10 +81,6 @@ ar_chan_t ar; \ logic ar_valid; \ logic r_ready; \ - `ifndef __ACE_NO_ACKS \ - logic wack; \ - logic rack; \ - `endif \ } req_t; `define ACE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) \ typedef struct packed { \ @@ -153,7 +149,9 @@ logic last; \ } cd_chan_t; `define SNOOP_TYPEDEF_CR_CHAN_T(cr_chan_t) \ - typedef ace_pkg::crresp_t cr_chan_t; + typedef struct packed { \ + ace_pkg::crresp_t resp; \ + } cr_chan_t; `define SNOOP_TYPEDEF_REQ_T(req_t, ac_chan_t) \ typedef struct packed { \ logic ac_valid; \ @@ -167,7 +165,7 @@ logic cd_valid; \ cd_chan_t cd; \ logic cr_valid; \ - cr_chan_t cr_resp; \ + cr_chan_t cr; \ } resp_t; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/ace_cut.sv b/src/ace_cut.sv deleted file mode 100644 index 47050af..0000000 --- a/src/ace_cut.sv +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright (c) 2014-2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -// Authors: -// - Wolfgang Roenninger -// - Fabian Schuiki -// - Andreas Kurth -// - Riccardo Tedeschi - -/// An ACE4 cut. -/// -/// Breaks all combinatorial paths between its input and output. -module ace_cut #( - // bypass enable - parameter bit Bypass = 1'b0, - parameter bit BypassAw = Bypass, - parameter bit BypassW = Bypass, - parameter bit BypassB = Bypass, - parameter bit BypassAr = Bypass, - parameter bit BypassR = Bypass, - parameter bit BypassAck = Bypass, - // ACE channel structs - parameter type aw_chan_t = logic, - parameter type w_chan_t = logic, - parameter type b_chan_t = logic, - parameter type ar_chan_t = logic, - parameter type r_chan_t = logic, - // ACE request & response structs - parameter type ace_req_t = logic, - parameter type ace_resp_t = logic -) ( - input logic clk_i, - input logic rst_ni, - // salve port - input ace_req_t slv_req_i, - output ace_resp_t slv_resp_o, - // master port - output ace_req_t mst_req_o, - input ace_resp_t mst_resp_i -); - - // a spill register for each channel - spill_register #( - .T (aw_chan_t), - .Bypass(BypassAw) - ) i_reg_aw ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .valid_i(slv_req_i.aw_valid), - .ready_o(slv_resp_o.aw_ready), - .data_i (slv_req_i.aw), - .valid_o(mst_req_o.aw_valid), - .ready_i(mst_resp_i.aw_ready), - .data_o (mst_req_o.aw) - ); - - spill_register #( - .T (w_chan_t), - .Bypass(BypassW) - ) i_reg_w ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .valid_i(slv_req_i.w_valid), - .ready_o(slv_resp_o.w_ready), - .data_i (slv_req_i.w), - .valid_o(mst_req_o.w_valid), - .ready_i(mst_resp_i.w_ready), - .data_o (mst_req_o.w) - ); - - spill_register #( - .T (b_chan_t), - .Bypass(BypassB) - ) i_reg_b ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .valid_i(mst_resp_i.b_valid), - .ready_o(mst_req_o.b_ready), - .data_i (mst_resp_i.b), - .valid_o(slv_resp_o.b_valid), - .ready_i(slv_req_i.b_ready), - .data_o (slv_resp_o.b) - ); - - spill_register #( - .T (ar_chan_t), - .Bypass(BypassAr) - ) i_reg_ar ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .valid_i(slv_req_i.ar_valid), - .ready_o(slv_resp_o.ar_ready), - .data_i (slv_req_i.ar), - .valid_o(mst_req_o.ar_valid), - .ready_i(mst_resp_i.ar_ready), - .data_o (mst_req_o.ar) - ); - - spill_register #( - .T (r_chan_t), - .Bypass(BypassR) - ) i_reg_r ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .valid_i(mst_resp_i.r_valid), - .ready_o(mst_req_o.r_ready), - .data_i (mst_resp_i.r), - .valid_o(slv_resp_o.r_valid), - .ready_i(slv_req_i.r_ready), - .data_o (slv_resp_o.r) - ); - - if (BypassAck) begin : gen_xack_bypass - assign mst_req_o.wack = slv_req_i.wack; - assign mst_req_o.rack = slv_req_i.rack; - end else begin : gen_xack_reg - - logic wack_q; - logic rack_q; - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - wack_q <= 1'b0; - rack_q <= 1'b0; - end else begin - wack_q <= slv_req_i.wack; - rack_q <= slv_req_i.rack; - end - end - - assign mst_req_o.wack = wack_q; - assign mst_req_o.rack = rack_q; - end -endmodule - -`include "ace/assign.svh" -`include "ace/typedef.svh" - -// interface wrapper -module ace_cut_intf #( - // Bypass eneable - parameter bit BYPASS = 1'b0, - parameter bit BYPASS_AW = BYPASS, - parameter bit BYPASS_W = BYPASS, - parameter bit BYPASS_B = BYPASS, - parameter bit BYPASS_AR = BYPASS, - parameter bit BYPASS_R = BYPASS, - parameter bit BYPASS_ACK = BYPASS, - // The address width. - parameter int unsigned ADDR_WIDTH = 0, - // The data width. - parameter int unsigned DATA_WIDTH = 0, - // The ID width. - parameter int unsigned ID_WIDTH = 0, - // The user data width. - parameter int unsigned USER_WIDTH = 0 -) ( - input logic clk_i, - input logic rst_ni, - ACE_BUS.Slave in, - ACE_BUS.Master out -); - - typedef logic [ID_WIDTH-1:0] id_t; - typedef logic [ADDR_WIDTH-1:0] addr_t; - typedef logic [DATA_WIDTH-1:0] data_t; - typedef logic [DATA_WIDTH/8-1:0] strb_t; - typedef logic [USER_WIDTH-1:0] user_t; - - `ACE_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) - `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) - `ACE_TYPEDEF_REQ_T(ace_req_t, aw_chan_t, w_chan_t, ar_chan_t) - `ACE_TYPEDEF_RESP_T(ace_resp_t, b_chan_t, r_chan_t) - - ace_req_t slv_req, mst_req; - ace_resp_t slv_resp, mst_resp; - - `ACE_ASSIGN_TO_REQ(slv_req, in) - `ACE_ASSIGN_FROM_RESP(in, slv_resp) - - `ACE_ASSIGN_FROM_REQ(out, mst_req) - `ACE_ASSIGN_TO_RESP(mst_resp, out) - - ace_cut #( - .Bypass (BYPASS), - .BypassAw (BYPASS_AW), - .BypassW (BYPASS_W), - .BypassB (BYPASS_B), - .BypassAr (BYPASS_AR), - .BypassR (BYPASS_R), - .BypassAck (BYPASS_ACK), - .aw_chan_t (aw_chan_t), - .w_chan_t (w_chan_t), - .b_chan_t (b_chan_t), - .ar_chan_t (ar_chan_t), - .r_chan_t (r_chan_t), - .ace_req_t (ace_req_t), - .ace_resp_t(ace_resp_t) - ) i_ace_cut ( - .clk_i, - .rst_ni, - .slv_req_i (slv_req), - .slv_resp_o(slv_resp), - .mst_req_o (mst_req), - .mst_resp_i(mst_resp) - ); - - // Check the invariants. - // pragma translate_off -`ifndef VERILATOR - initial begin - assert (ADDR_WIDTH > 0) - else $fatal(1, "Wrong addr width parameter"); - assert (DATA_WIDTH > 0) - else $fatal(1, "Wrong data width parameter"); - assert (ID_WIDTH > 0) - else $fatal(1, "Wrong id width parameter"); - assert (USER_WIDTH > 0) - else $fatal(1, "Wrong user width parameter"); - assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (in.AXI_DATA_WIDTH == DATA_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (in.AXI_ID_WIDTH == ID_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (in.AXI_USER_WIDTH == USER_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (out.AXI_DATA_WIDTH == DATA_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (out.AXI_ID_WIDTH == ID_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (out.AXI_USER_WIDTH == USER_WIDTH) - else $fatal(1, "Wrong interface definition"); - end -`endif - // pragma translate_on -endmodule diff --git a/src/ace_demux_simple.sv b/src/ace_demux_simple.sv deleted file mode 100644 index b429729..0000000 --- a/src/ace_demux_simple.sv +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -// Authors: -// - Riccardo Tedeschi - -`include "ace/typedef.svh" -`include "ace/assign.svh" - -module ace_demux_simple #( - parameter int unsigned AxiIdWidth = 32'd0, - parameter bit AtopSupport = 1'b1, - parameter type aw_chan_t = logic, - parameter type w_chan_t = logic, - parameter type ar_chan_t = logic, - parameter type req_t = logic, - parameter type resp_t = logic, - parameter int unsigned NoMstPorts = 32'd0, - parameter int unsigned MaxTrans = 32'd8, - parameter int unsigned AxiLookBits = 32'd3, - parameter bit UniqueIds = 1'b0, - localparam int unsigned SelectWidth = (NoMstPorts > 32'd1) ? $clog2(NoMstPorts) : 32'd1, - localparam type select_t = logic [SelectWidth-1:0] -) ( - input logic clk_i, - input logic rst_ni, - input logic test_i, - // Slave Port - input req_t slv_req_i, - input select_t slv_aw_select_i, - input select_t slv_ar_select_i, - output resp_t slv_resp_o, - // Master Ports - output req_t [NoMstPorts-1:0] mst_reqs_o, - input resp_t [NoMstPorts-1:0] mst_resps_i -); - - // All subsequent ACE defines will not use RACK/WACKS - `define __ACE_NO_ACKS - - // ACE request structure without RACK and WACK - `ACE_TYPEDEF_REQ_T(__req_t, aw_chan_t, w_chan_t, ar_chan_t) - - __req_t slv_req; - __req_t [NoMstPorts-1:0] mst_reqs; - resp_t [NoMstPorts-1:0] mst_resps; - - select_t mst_b_idx; - select_t mst_r_idx; - select_t wack_idx; - select_t rack_idx; - - logic [NoMstPorts-1:0] mst_wacks; - logic [NoMstPorts-1:0] mst_racks; - - `ACE_ASSIGN_REQ_STRUCT(slv_req, slv_req_i) - - // AXI demux simple instance - // {{{ - axi_demux_simple #( - .AxiIdWidth (AxiIdWidth), - .AtopSupport(AtopSupport), - .axi_req_t (__req_t), - .axi_resp_t (resp_t), - .NoMstPorts (NoMstPorts), - .MaxTrans (MaxTrans), - .AxiLookBits(AxiLookBits), - .UniqueIds (UniqueIds) - ) u_axi_demux ( - .clk_i, - .rst_ni, - .test_i (test_i), - .slv_req_i (slv_req), - .slv_resp_o (slv_resp_o), - .slv_aw_select_i(slv_aw_select_i), - .slv_ar_select_i(slv_ar_select_i), - .mst_reqs_o (mst_reqs), - .mst_resps_i (mst_resps), - .mst_b_idx_o (mst_b_idx), - .mst_r_idx_o (mst_r_idx) - ); - // }}} - - // xACKs generation - // {{{ - fifo_v3 #( - .FALL_THROUGH(1'b0), - .DEPTH (MaxTrans), - .dtype (select_t) - ) i_switch_w_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .full_o (mst_b_stall), - .empty_o (), - .usage_o (), - .data_i (mst_b_idx), - .push_i (slv_resp_o.b_valid && slv_req_i.b_ready), - .data_o (wack_idx), - .pop_i (slv_req_i.wack) - ); - - fifo_v3 #( - .FALL_THROUGH(1'b0), - .DEPTH (MaxTrans), - .dtype (select_t) - ) i_switch_r_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .full_o (mst_r_stall), - .empty_o (), - .usage_o (), - .data_i (mst_r_idx), - .push_i (slv_resp_o.r_valid && slv_req_i.r_ready && slv_resp_o.r.last), - .data_o (rack_idx), - .pop_i (slv_req_i.rack) - ); - - always_comb begin - mst_wacks = '0; - mst_racks = '0; - - if (slv_req_i.rack) mst_racks[rack_idx] = 1'b1; - if (slv_req_i.wack) mst_wacks[wack_idx] = 1'b1; - end - - always_comb begin - for (int i = 0; i < NoMstPorts; i++) begin - `ACE_SET_REQ_STRUCT(mst_reqs_o[i], mst_reqs[i]) - `ACE_SET_RESP_STRUCT(mst_resps[i], mst_resps_i[i]) - mst_reqs_o[i].rack = mst_racks[i]; - mst_reqs_o[i].wack = mst_wacks[i]; - - if (mst_r_stall) begin - mst_resps[i].r_valid = 1'b0; - mst_reqs_o[i].r_ready = 1'b0; - end - - if (mst_b_stall) begin - mst_resps[i].b_valid = 1'b0; - mst_reqs_o[i].b_ready = 1'b0; - end - end - end - // }}} - - `undef __ACE_NO_ACKS - -endmodule diff --git a/src/ace_mux.sv b/src/ace_mux.sv deleted file mode 100644 index edc11ba..0000000 --- a/src/ace_mux.sv +++ /dev/null @@ -1,424 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -// Authors: -// - Riccardo Tedeschi - -`include "ace/typedef.svh" -`include "ace/assign.svh" - -module ace_mux #( - // ACE parameter and channel types - parameter int unsigned SlvAxiIDWidth = 32'd0, // AXI ID width, slave ports - parameter type slv_aw_chan_t = logic, // AW Channel Type, slave ports - parameter type mst_aw_chan_t = logic, // AW Channel Type, master port - parameter type w_chan_t = logic, // W Channel Type, all ports - parameter type slv_b_chan_t = logic, // B Channel Type, slave ports - parameter type mst_b_chan_t = logic, // B Channel Type, master port - parameter type slv_ar_chan_t = logic, // AR Channel Type, slave ports - parameter type mst_ar_chan_t = logic, // AR Channel Type, master port - parameter type slv_r_chan_t = logic, // R Channel Type, slave ports - parameter type mst_r_chan_t = logic, // R Channel Type, master port - parameter type slv_req_t = logic, // Slave port request type - parameter type slv_resp_t = logic, // Slave port response type - parameter type mst_req_t = logic, // Master ports request type - parameter type mst_resp_t = logic, // Master ports response type - parameter int unsigned NoSlvPorts = 32'd0, // Number of slave ports - // Maximum number of outstanding transactions per write - parameter int unsigned MaxWTrans = 32'd8, - // Maximum number of outstanding transactions per B channel (ACE) - parameter int unsigned MaxBTrans = 32'd8, - // Maximum number of outstanding transactions per R channel (ACE) - parameter int unsigned MaxRTrans = 32'd8, - // If enabled, this multiplexer is purely combinatorial - parameter bit FallThrough = 1'b0, - // add spill register on write master ports, adds a cycle latency on write channels - parameter bit SpillAw = 1'b1, - parameter bit SpillW = 1'b0, - parameter bit SpillB = 1'b0, - // add spill register on read master ports, adds a cycle latency on read channels - parameter bit SpillAr = 1'b1, - parameter bit SpillR = 1'b0, - // add registers on xACK ports, add a cycle latency on acknowledgment signals (ACE) - parameter bit RegAck = 1'b0 -) ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic test_i, // Test Mode enable - // slave ports (ACE inputs), connect master modules here - input slv_req_t [NoSlvPorts-1:0] slv_reqs_i, - output slv_resp_t [NoSlvPorts-1:0] slv_resps_o, - // master port (ACE outputs), connect slave modules here - output mst_req_t mst_req_o, - input mst_resp_t mst_resp_i -); - // All subsequent ACE defines will not use RACK/WACKS - `define __ACE_NO_ACKS - - // Internal request type without acks - `ACE_TYPEDEF_REQ_T(__slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) - `ACE_TYPEDEF_REQ_T(__mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) - - __slv_req_t [NoSlvPorts-1:0] slv_reqs; - - __mst_req_t mst_req; - mst_resp_t mst_resp; - - // Input req setup - // {{{ - for (genvar i = 0; i < NoSlvPorts; i++) begin - `ACE_ASSIGN_REQ_STRUCT(slv_reqs[i], slv_reqs_i[i]) - end - // }}} - - // AXI MUX instance - // {{{ - axi_mux #( - .SlvAxiIDWidth(SlvAxiIDWidth), - .slv_aw_chan_t(slv_aw_chan_t), - .mst_aw_chan_t(mst_aw_chan_t), - .w_chan_t (w_chan_t), - .slv_b_chan_t (slv_b_chan_t), - .mst_b_chan_t (mst_b_chan_t), - .slv_ar_chan_t(slv_ar_chan_t), - .mst_ar_chan_t(mst_ar_chan_t), - .slv_r_chan_t (slv_r_chan_t), - .mst_r_chan_t (mst_r_chan_t), - .slv_req_t (__slv_req_t), - .slv_resp_t (slv_resp_t), - .mst_req_t (__mst_req_t), - .mst_resp_t (mst_resp_t), - .NoSlvPorts (NoSlvPorts), - .MaxWTrans (MaxWTrans), - .FallThrough (FallThrough), - .SpillAw (SpillAw), - .SpillW (SpillW), - .SpillB (SpillB), - .SpillAr (SpillAr), - .SpillR (SpillR) - ) u_axi_mux ( - .clk_i, - .rst_ni, - .test_i (test_i), - .slv_reqs_i (slv_reqs), - .slv_resps_o(slv_resps_o), - .mst_req_o (mst_req), - .mst_resp_i (mst_resp) - ); - - // }}} - - // Output req/resp setup - // {{{ - logic [NoSlvPorts-1:0] slv_racks; - logic [NoSlvPorts-1:0] slv_wacks; - logic mst_rack; - logic mst_wack; - logic mst_b_stall; - logic mst_r_stall; - - for (genvar i = 0; i < NoSlvPorts; i++) begin - assign slv_racks[i] = slv_reqs_i[i].rack; - assign slv_wacks[i] = slv_reqs_i[i].wack; - end - - always_comb begin - // Use AXI defines since we are working with - // the AXI backward compatible req structure whithout xACKs - `ACE_SET_REQ_STRUCT(mst_req_o, mst_req) - `ACE_SET_RESP_STRUCT(mst_resp, mst_resp_i) - // Get the xACKs from the dedicated logic instead - mst_req_o.wack = mst_wack; - mst_req_o.rack = mst_rack; - - // Stall B if the WACK ROB is full - if (mst_b_stall) begin - mst_resp.b_valid = 1'b0; - mst_req_o.b_ready = 1'b0; - end - - // Stall R if the WACK ROB is full - if (mst_r_stall) begin - mst_resp.r_valid = 1'b0; - mst_req_o.r_ready = 1'b0; - end - end - // }}} - - // xACKs generation - // {{{ - if (NoSlvPorts > 1) begin : gen_xack_rob - localparam int unsigned MstIdxBits = $clog2(NoSlvPorts); - typedef logic [MstIdxBits-1:0] switch_id_t; - - switch_id_t switch_b_id; - switch_id_t switch_r_id; - - logic r_last_handshake; - logic b_handshake; - - assign switch_r_id = mst_resp.r.id[SlvAxiIDWidth+:MstIdxBits]; - assign switch_b_id = mst_resp.b.id[SlvAxiIDWidth+:MstIdxBits]; - - assign r_last_handshake = mst_req_o.r_ready && mst_resp_i.r_valid && mst_resp_i.r.last; - assign b_handshake = mst_req_o.b_ready && mst_resp_i.b_valid; - - ace_mux_xack #( - .N (NoSlvPorts), - .MAX_OUTSTANDING(MaxBTrans), - .FALL_THROUGH (!RegAck) - ) u_wack_gen ( - .clk_i, - .rst_ni, - .empty_o ( /* unused */), - .full_o (mst_b_stall), - .handshake_i(b_handshake), - .idx_i (switch_b_id), - .acks_i (slv_wacks), - .ack_o (mst_wack) - ); - - ace_mux_xack #( - .N (NoSlvPorts), - .MAX_OUTSTANDING(MaxRTrans), - .FALL_THROUGH (!RegAck) - ) u_rack_gen ( - .clk_i, - .rst_ni, - .empty_o ( /* unused */), - .full_o (mst_r_stall), - .handshake_i(r_last_handshake), - .idx_i (switch_r_id), - .acks_i (slv_racks), - .ack_o (mst_rack) - ); - end else if (!RegAck) begin : gen_xack_assign - assign mst_wack = slv_wacks[0]; - assign mst_rack = slv_racks[0]; - assign mst_b_stall = 1'b0; - assign mst_r_stall = 1'b0; - end else begin : gen_xack_ffs - always_ff @(posedge clk_i or negedge rst_ni) begin - mst_wack <= slv_wacks[0]; - mst_rack <= slv_racks[0]; - end - assign mst_b_stall = 1'b0; - assign mst_r_stall = 1'b0; - end - // }}} - - `undef __ACE_NO_ACKS - -endmodule - -// interface wrap -module ace_mux_intf #( - parameter int unsigned SLV_AXI_ID_WIDTH = 32'd0, // Synopsys DC requires default value for params - parameter int unsigned MST_AXI_ID_WIDTH = 32'd0, - parameter int unsigned AXI_ADDR_WIDTH = 32'd0, - parameter int unsigned AXI_DATA_WIDTH = 32'd0, - parameter int unsigned AXI_USER_WIDTH = 32'd0, - parameter int unsigned NO_SLV_PORTS = 32'd0, // Number of slave ports - // Maximum number of outstanding transactions per write - parameter int unsigned MAX_W_TRANS = 32'd8, - // Maximum number of outstanding transactions per B channel (ACE) - parameter int unsigned MAX_B_TRANS = 32'd8, - // Maximum number of outstanding transactions per R channel (ACE) - parameter int unsigned MAX_R_TRANS = 32'd8, - // if enabled, this multiplexer is purely combinatorial - parameter bit FALL_THROUGH = 1'b0, - // add spill register on write master ports, adds a cycle latency on write channels - parameter bit SPILL_AW = 1'b1, - parameter bit SPILL_W = 1'b0, - parameter bit SPILL_B = 1'b0, - // add spill register on read master ports, adds a cycle latency on read channels - parameter bit SPILL_AR = 1'b1, - parameter bit SPILL_R = 1'b0, - // add registers on xACK ports, add a cycle latency on acknowledgment signals (ACE) - parameter bit REG_ACK = 1'b0 -) ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic test_i, // Testmode enable - ACE_BUS.Slave slv [NO_SLV_PORTS-1:0], // slave ports - ACE_BUS.Master mst // master port -); - - typedef logic [SLV_AXI_ID_WIDTH-1:0] slv_id_t; - typedef logic [MST_AXI_ID_WIDTH-1:0] mst_id_t; - typedef logic [AXI_ADDR_WIDTH -1:0] addr_t; - typedef logic [AXI_DATA_WIDTH-1:0] data_t; - typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; - typedef logic [AXI_USER_WIDTH-1:0] user_t; - // channels typedef - `ACE_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, slv_id_t, user_t) - `ACE_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, mst_id_t, user_t) - - `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) - - `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, slv_id_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, mst_id_t, user_t) - - `ACE_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, slv_id_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, mst_id_t, user_t) - - `ACE_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, slv_id_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, mst_id_t, user_t) - - `ACE_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) - `ACE_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t) - - `ACE_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) - `ACE_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t) - - slv_req_t [NO_SLV_PORTS-1:0] slv_reqs; - slv_resp_t [NO_SLV_PORTS-1:0] slv_resps; - mst_req_t mst_req; - mst_resp_t mst_resp; - - for (genvar i = 0; i < NO_SLV_PORTS; i++) begin : gen_assign_slv_ports - `ACE_ASSIGN_TO_REQ(slv_reqs[i], slv[i]) - `ACE_ASSIGN_FROM_RESP(slv[i], slv_resps[i]) - end - - `ACE_ASSIGN_FROM_REQ(mst, mst_req) - `ACE_ASSIGN_TO_RESP(mst_resp, mst) - - ace_mux #( - .SlvAxiIDWidth(SLV_AXI_ID_WIDTH), - .slv_aw_chan_t(slv_aw_chan_t), // AW Channel Type, slave ports - .mst_aw_chan_t(mst_aw_chan_t), // AW Channel Type, master port - .w_chan_t (w_chan_t), // W Channel Type, all ports - .slv_b_chan_t (slv_b_chan_t), // B Channel Type, slave ports - .mst_b_chan_t (mst_b_chan_t), // B Channel Type, master port - .slv_ar_chan_t(slv_ar_chan_t), // AR Channel Type, slave ports - .mst_ar_chan_t(mst_ar_chan_t), // AR Channel Type, master port - .slv_r_chan_t (slv_r_chan_t), // R Channel Type, slave ports - .mst_r_chan_t (mst_r_chan_t), // R Channel Type, master port - .slv_req_t (slv_req_t), - .slv_resp_t (slv_resp_t), - .mst_req_t (mst_req_t), - .mst_resp_t (mst_resp_t), - .NoSlvPorts (NO_SLV_PORTS), // Number of slave ports - .MaxWTrans (MAX_W_TRANS), - .MaxBTrans (MAX_B_TRANS), - .MaxRTrans (MAX_R_TRANS), - .FallThrough (FALL_THROUGH), - .SpillAw (SPILL_AW), - .SpillW (SPILL_W), - .SpillB (SPILL_B), - .SpillAr (SPILL_AR), - .SpillR (SPILL_R), - .RegAck (REG_ACK) - ) i_ace_mux ( - .clk_i (clk_i), // Clock - .rst_ni (rst_ni), // Asynchronous reset active low - .test_i (test_i), // Test Mode enable - .slv_reqs_i (slv_reqs), - .slv_resps_o(slv_resps), - .mst_req_o (mst_req), - .mst_resp_i (mst_resp) - ); -endmodule - -module ace_mux_xack -// Parameters -// {{{ -#( - parameter int unsigned N = 0, - parameter int unsigned MAX_OUTSTANDING = 4, - parameter bit FALL_THROUGH = 0, - localparam int unsigned IDX_WIDTH = N > 1 ? $clog2(N) : 1, - localparam type idx_t = logic [IDX_WIDTH-1:0] -) -// }}} - -// Ports -// {{{ -( - input logic clk_i, - input logic rst_ni, - - output logic empty_o, - output logic full_o, - - input logic handshake_i, - input idx_t idx_i, - - input logic [N-1:0] acks_i, - - output logic ack_o -); - // }}} - - // Internal signals - // {{{ - idx_t sel; - logic [N-1:0] sel_bv; - logic [N-1:0] ack_gnts; - logic [N-1:0] ack_reqs; - // }}} - - // Track response ordering with a FIFO - // {{{ - fifo_v3 #( - .FALL_THROUGH(1'b0), - .DEPTH (MAX_OUTSTANDING), - .dtype (idx_t) - ) i_sel_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .full_o (full_o), - .empty_o (empty_o), - .usage_o (), - .data_i (idx_i), - .push_i (handshake_i), - .data_o (sel), - .pop_i (ack_o) - ); - // }}} - - // Per-slv port credit counter - // An out-of-order xACK signal increases - // the counter by 1 - // When the FIFO selects the counter, - // it is decreased by 1 - // {{{ - for (genvar i = 0; i < N; i++) begin : gen_credit_counters - credit_counter #( - .NumCredits (MAX_OUTSTANDING), - .InitCreditEmpty(1'b1) - ) u_credit_counter ( - .clk_i, - .rst_ni, - .credit_o ( /* unused */), - .credit_give_i(acks_i[i]), - .credit_take_i(ack_gnts[i]), - .credit_init_i('0), - .credit_left_o(ack_reqs[i]), - .credit_crit_o( /* unused */), - .credit_full_o( /* unused */) - ); - end - // }}} - - // xACK generation - // {{{ - assign sel_bv = N'(1) << sel; - // In FALL_THROUGH mode, a concurrent increase and decrease of the counter - // does not alter its value and the xACK signal is combinationally generated - assign ack_gnts = sel_bv & (ack_reqs | (FALL_THROUGH ? acks_i : '0)); - assign ack_o = |ack_gnts; - // }}} - -endmodule diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index 8996758..6f62a9d 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -267,7 +267,7 @@ package ace_pkg; // Transaction groups - function automatic logic ace_aw_is_coherent(logic awbar0, axdomain_t awdomain, + function automatic logic ace_aw_is_shareable(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); logic retval; unique case (1'b1) @@ -302,7 +302,7 @@ package ace_pkg; return retval; endfunction - function automatic logic ace_ar_is_coherent(logic arbar0, axdomain_t ardomain, + function automatic logic ace_ar_is_shareable(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); logic retval; unique case (1'b1) @@ -318,13 +318,12 @@ package ace_pkg; return retval; endfunction - function automatic logic ace_ar_is_cache_maintenance(logic arbar0, axdomain_t ardomain, - arsnoop_t arsnoop); + function automatic logic ace_ar_is_clean(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); logic retval; unique case (1'b1) + ace_is_clean_unique(arbar0, ardomain, arsnoop): retval = 1'b1; ace_is_clean_shared(arbar0, ardomain, arsnoop): retval = 1'b1; ace_is_clean_invalid(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_make_invalid(arbar0, ardomain, arsnoop): retval = 1'b1; default: retval = 1'b0; endcase return retval; @@ -332,16 +331,13 @@ package ace_pkg; // Snoop transaction from initiating master transaction function automatic acsnoop_t ace_ar_acsnoop_map(logic arbar0, axdomain_t ardomain, - arsnoop_t arsnoop, logic arlock); + arsnoop_t arsnoop); acsnoop_t acsnoop; unique case (1'b1) ace_is_clean_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(CleanInvalid); ace_is_make_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(MakeInvalid); default: acsnoop = acsnoop_t'(arsnoop); endcase - // Hacky way to support AMOs in Culsans with the legacy WB cache - if (arlock && ace_is_read_once(arbar0, ardomain, arsnoop)) - acsnoop = acsnoop_t'(CleanInvalid); return acsnoop; endfunction diff --git a/src/ace_snoop_cut.sv b/src/ace_snoop_cut.sv deleted file mode 100644 index 62c9827..0000000 --- a/src/ace_snoop_cut.sv +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright (c) 2014-2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -// Authors: -// - Riccardo Tedeschi - -/// An ACE4 snoop cut. -/// -/// Breaks all combinatorial paths between its input and output. -module ace_snoop_cut #( - // bypass enable - parameter bit Bypass = 1'b0, - parameter bit BypassAc = Bypass, - parameter bit BypassCr = Bypass, - parameter bit BypassCd = Bypass, - // ACE snoop channel structs - parameter type ac_chan_t = logic, - parameter type cd_chan_t = logic, - parameter type cr_chan_t = logic, - // ACE snoop request & response structs - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic -) ( - input logic clk_i, - input logic rst_ni, - // salve port - input snoop_req_t slv_req_i, - output snoop_resp_t slv_resp_o, - // master port - output snoop_req_t mst_req_o, - input snoop_resp_t mst_resp_i -); - - // Snoop channels cut - spill_register #( - .T (ac_chan_t), - .Bypass(BypassAc) - ) i_reg_ac ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .valid_i(slv_req_i.ac_valid), - .ready_o(slv_resp_o.ac_ready), - .data_i (slv_req_i.ac), - .valid_o(mst_req_o.ac_valid), - .ready_i(mst_resp_i.ac_ready), - .data_o (mst_req_o.ac) - ); - - spill_register #( - .T (cd_chan_t), - .Bypass(BypassCd) - ) i_reg_cd ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .valid_i(mst_resp_i.cd_valid), - .ready_o(mst_req_o.cd_ready), - .data_i (mst_resp_i.cd), - .valid_o(slv_resp_o.cd_valid), - .ready_i(slv_req_i.cd_ready), - .data_o (slv_resp_o.cd) - ); - - spill_register #( - .T (cr_chan_t), - .Bypass(BypassCr) - ) i_reg_cr ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .valid_i(mst_resp_i.cr_valid), - .ready_o(mst_req_o.cr_ready), - .data_i (mst_resp_i.cr_resp), - .valid_o(slv_resp_o.cr_valid), - .ready_i(slv_req_i.cr_ready), - .data_o (slv_resp_o.cr_resp) - ); - -endmodule - -`include "ace/assign.svh" -`include "ace/typedef.svh" - -// interface wrapper -module ace_snoop_cut_intf #( - // Bypass eneable - parameter bit BYPASS = 1'b0, - parameter bit BYPASS_AC = BYPASS, - parameter bit BYPASS_CR = BYPASS, - parameter bit BYPASS_CD = BYPASS, - // The address width. - parameter int unsigned ADDR_WIDTH = 0, - // The data width. - parameter int unsigned DATA_WIDTH = 0 -) ( - input logic clk_i, - input logic rst_ni, - SNOOP_BUS.Slave in, - SNOOP_BUS.Master out -); - - typedef logic [ADDR_WIDTH-1:0] addr_t; - typedef logic [DATA_WIDTH-1:0] data_t; - - `SNOOP_TYPEDEF_ALL(snoop, addr_t, data_t) - - snoop_req_t slv_req, mst_req; - snoop_resp_t slv_resp, mst_resp; - - `SNOOP_ASSIGN_TO_REQ(slv_req, in) - `SNOOP_ASSIGN_FROM_RESP(in, slv_resp) - - `SNOOP_ASSIGN_FROM_REQ(out, mst_req) - `SNOOP_ASSIGN_TO_RESP(mst_resp, out) - - ace_snoop_cut #( - .Bypass (BYPASS), - .BypassAc (BYPASS_AC), - .BypassCr (BYPASS_CR), - .BypassCd (BYPASS_CD), - .ac_chan_t (snoop_ac_chan_t), - .cd_chan_t (snoop_cd_chan_t), - .cr_chan_t (snoop_cr_chan_t), - .snoop_req_t (snoop_req_t), - .snoop_resp_t(snoop_resp_t) - ) i_ace_snoop_cut ( - .clk_i, - .rst_ni, - .slv_req_i (slv_req), - .slv_resp_o(slv_resp), - .mst_req_o (mst_req), - .mst_resp_i(mst_resp) - ); - - // Check the invariants. - // pragma translate_off -`ifndef VERILATOR - initial begin - assert (ADDR_WIDTH > 0) - else $fatal(1, "Wrong addr width parameter"); - assert (DATA_WIDTH > 0) - else $fatal(1, "Wrong data width parameter"); - assert (in.SNOOP_ADDR_WIDTH == ADDR_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (in.SNOOP_DATA_WIDTH == DATA_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (out.SNOOP_ADDR_WIDTH == ADDR_WIDTH) - else $fatal(1, "Wrong interface definition"); - assert (out.SNOOP_DATA_WIDTH == DATA_WIDTH) - else $fatal(1, "Wrong interface definition"); - end -`endif - // pragma translate_on -endmodule diff --git a/src/ccu/ace_ccu_ax_arbiter.sv b/src/ccu/ace_ccu_ax_arbiter.sv deleted file mode 100644 index 376155d..0000000 --- a/src/ccu/ace_ccu_ax_arbiter.sv +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -`include "axi/assign.svh" - -module ace_ccu_ax_arbiter - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type midend_aw_t = logic, - parameter type midend_ar_t = logic, - parameter type midend_ax_t = logic -) ( - input logic clk_i, - input logic rst_ni, - - input logic replay_full_i, - - input midend_aw_t aw_i, - input logic aw_valid_i, - output logic aw_ready_o, - input midend_ar_t ar_i, - input logic ar_valid_i, - output logic ar_ready_o, - input midend_ar_t replay_ar_i, - input logic replay_ar_valid_i, - output logic replay_ar_ready_o, - - output midend_ax_t ax_o, - output logic ax_valid_o, - input logic ax_ready_i, - output logic ax_is_write_o, - output logic ax_is_replay_o, - output acsnoop_t ax_acsnoop_o, - output logic ar_accepts_dirty_o, - output logic ar_accepts_dirty_shared_o, - output logic ar_accepts_shared_o, - output axdomain_t ax_domain_o -); - - // Internal signals - // {{{ - midend_ar_t ar_muxed; - midend_ax_t aw_in; - midend_ax_t ar_in; - midend_ax_t ax; - midend_ax_t replay_ar; - logic ax_valid; - logic ax_ready; - logic ax_arb_valid; - logic ax_arb_ready; - logic ax_is_write; - acsnoop_t aw_acsnoop; - acsnoop_t ar_acsnoop; - logic ar_accepts_dirty; - logic ar_accepts_dirty_shared; - logic ar_accepts_shared; - // }}} - - // Coherence decoding - // {{{ - assign ar_muxed = ax_is_replay_o ? replay_ar_i : ar_i; - // ACSNOOP computed from AWSNOOP - assign aw_acsnoop = ace_aw_acsnoop_map(aw_i.bar[0], aw_i.domain, aw_i.snoop); - // ACSNOOP computed from ARSNOOP - assign ar_acsnoop = ace_ar_acsnoop_map( - ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop, ar_muxed.lock - ); - // Read transaction can accept a cacheline in Dirty state - assign ar_accepts_dirty = ace_ar_accepts_dirty( - ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop - ); - // Read transaction can accept a cacheline in Dirty and Shared state - assign ar_accepts_dirty_shared = ace_ar_accepts_dirty_shared( - ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop - ); - // Read transaction can accept a cacheline in Shared state - assign ar_accepts_shared = ace_ar_accepts_shared( - ar_muxed.bar[0], ar_muxed.domain, ar_muxed.snoop - ); - // Mux output signals - always_comb begin : output_mux - // AR request (input or replay) - ax_is_write_o = 1'b0; - ax_acsnoop_o = ar_acsnoop; - ar_accepts_dirty_o = ar_accepts_dirty; - ar_accepts_dirty_shared_o = ar_accepts_dirty_shared; - ar_accepts_shared_o = ar_accepts_shared; - ax_domain_o = ar_muxed.domain; - - if (!ax_is_replay_o && ax_is_write) begin - // AW request (input) - ax_is_write_o = 1'b1; - ax_acsnoop_o = aw_acsnoop; - ax_domain_o = aw_i.domain; - end - end - // }}} - - // Input AX arbiter - // {{{ - // Assign AW to internal AX data type - always_comb begin - aw_in = '0; - `AXI_SET_AW_STRUCT(aw_in, aw_i) - end - - // Assign AR to internal AX data type - always_comb begin - ar_in = '0; - `AXI_SET_AR_STRUCT(ar_in, ar_i) - end - - rr_arb_tree #( - .NumIn (2), - .DataType (midend_ax_t), - .AxiVldRdy(1'b1), - .LockIn (1'b1) - ) u_ax_arbiter ( - .clk_i, - .rst_ni, - .flush_i(1'b0), - .rr_i (1'b0), - .req_i ({aw_valid_i, ar_valid_i}), - .gnt_o ({aw_ready_o, ar_ready_o}), - .data_i ({aw_in, ar_in}), - .req_o (ax_arb_valid), - .gnt_i (ax_arb_ready), - .data_o (ax), - .idx_o (ax_is_write) - ); - - assign ax_valid = !replay_full_i && ax_arb_valid; - assign ax_arb_ready = !replay_full_i && ax_ready; - // }}} - - // Replay arbiter - // {{{ - // Assign replay AR to internal AX data type - always_comb begin - replay_ar = '0; - `AXI_SET_AR_STRUCT(replay_ar, replay_ar_i) - end - - rr_arb_tree #( - .NumIn (2), - .DataType (midend_ax_t), - .AxiVldRdy(1'b1), - .LockIn (1'b0), - .ExtPrio (1'b1) - ) u_replay_arbiter ( - .clk_i, - .rst_ni, - .flush_i(1'b0), - .rr_i ('1), - .req_i ({replay_ar_valid_i, ax_valid}), - .gnt_o ({replay_ar_ready_o, ax_ready}), - .data_i ({replay_ar, ax}), - .req_o (ax_valid_o), - .gnt_i (ax_ready_i), - .data_o (ax_o), - .idx_o (ax_is_replay_o) - ); - // }}} - -endmodule diff --git a/src/ccu/ace_ccu_cd_arbiter.sv b/src/ccu/ace_ccu_cd_arbiter.sv deleted file mode 100644 index 3e2bd22..0000000 --- a/src/ccu/ace_ccu_cd_arbiter.sv +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -module ace_ccu_cd_arbiter - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CcuCfg = '0, - parameter type cd_t = logic -) ( - input logic clk_i, - input logic rst_ni, - - input logic [CcuCfg.u.SlvPorts-1:0] cd_valid_i, - output logic [CcuCfg.u.SlvPorts-1:0] cd_ready_o, - input cd_t [CcuCfg.u.SlvPorts-1:0] cd_i, - - input logic cd_sel_valid_i, - output logic cd_sel_ready_o, - input logic [CcuCfg.u.SlvPorts-1:0] cd_sel_bv_i, - - output logic cd_valid_o, - input logic cd_ready_i, - output cd_t cd_o -); - - logic [ CcuCfg.u.SlvPorts-1:0] cd_sel_fork_out_valid; - logic [ CcuCfg.u.SlvPorts-1:0] cd_sel_fork_out_ready; - logic [ CcuCfg.u.SlvPorts-1:0] cd_last; - logic [ CcuCfg.u.SlvPorts-1:0] cd_join_out_valid; - logic [ CcuCfg.u.SlvPorts-1:0] cd_join_out_ready; - logic [ CcuCfg.u.SlvPorts-1:0] cd_drop; - logic [ CcuCfg.u.SlvPorts-1:0] cd_filter_out_valid; - logic [ CcuCfg.u.SlvPorts-1:0] cd_filter_out_ready; - - logic [CcuCfg.SlvPortIdxWidth-1:0] cd_first_resp_d; - logic [CcuCfg.SlvPortIdxWidth-1:0] cd_first_resp_q; - logic [CcuCfg.SlvPortIdxWidth-1:0] cd_first_resp_idx; - logic cd_first_resp_empty; - logic cd_first_resp_valid_d; - logic cd_first_resp_valid_q; - - stream_fork_dynamic #( - .N_OUP(CcuCfg.u.SlvPorts) - ) u_cd_fork ( - .clk_i, - .rst_ni, - .valid_i (cd_sel_valid_i), - .ready_o (cd_sel_ready_o), - .sel_i (cd_sel_bv_i), - .sel_valid_i('1), - .sel_ready_o(), - .valid_o (cd_sel_fork_out_valid), - .ready_i (cd_sel_fork_out_ready & cd_last) - ); - - for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_cd_filter - - assign cd_last[i] = cd_i[i].last; - - // Only selected channels can advance - stream_join #( - .N_INP(2) - ) u_cd_join ( - .inp_valid_i({cd_sel_fork_out_valid[i], cd_valid_i[i]}), - .inp_ready_o({cd_sel_fork_out_ready[i], cd_ready_o[i]}), - .oup_valid_o(cd_join_out_valid[i]), - .oup_ready_i(cd_join_out_ready[i]) - ); - - // Drop non first responder - stream_filter u_cd_filter ( - .valid_i(cd_join_out_valid[i]), - .ready_o(cd_join_out_ready[i]), - .drop_i (cd_drop[i]), - .valid_o(cd_filter_out_valid[i]), - .ready_i(cd_filter_out_ready[i]) - ); - - end - - // Select the first responder among the CD channels - stream_mux #( - .N_INP (CcuCfg.u.SlvPorts), - .DATA_T(cd_t) - ) u_cd_mux ( - .inp_data_i (cd_i), - .inp_valid_i(cd_filter_out_valid), - .inp_ready_o(cd_filter_out_ready), - .inp_sel_i (cd_first_resp_d), - .oup_data_o (cd_o), - .oup_valid_o(cd_valid_o), - .oup_ready_i(cd_ready_i) - ); - - lzc #( - .WIDTH(CcuCfg.u.SlvPorts) - ) u_cd_lzc ( - .in_i (cd_join_out_valid), - .cnt_o (cd_first_resp_idx), - .empty_o(cd_first_resp_empty) - ); - - always_comb begin - cd_first_resp_valid_d = cd_first_resp_valid_q; - cd_first_resp_d = cd_first_resp_q; - - if (!cd_first_resp_valid_q && !cd_first_resp_empty) begin - // There is a valid response and the first responder - // has not been found yet - // ~> save the LZC index - cd_first_resp_d = cd_first_resp_idx; - // ~> mark the first responder as valid - cd_first_resp_valid_d = 1'b1; - end - - if (cd_sel_valid_i && cd_sel_ready_o) begin - // All CD channels have been processed - // ~> clean first responder valid - cd_first_resp_valid_d = 1'b0; - end - end - - // Drop all selected CD channels which responded after the first responder - assign cd_drop = cd_first_resp_valid_q ? ~(CcuCfg.u.SlvPorts'(1) << cd_first_resp_q) : '0; - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - cd_first_resp_q <= '0; - cd_first_resp_valid_q <= 1'b0; - end else begin - cd_first_resp_q <= cd_first_resp_d; - cd_first_resp_valid_q <= cd_first_resp_valid_d; - end - end - -endmodule diff --git a/src/ccu/ace_ccu_cd_ctrl.sv b/src/ccu/ace_ccu_cd_ctrl.sv deleted file mode 100644 index cdf5e9e..0000000 --- a/src/ccu/ace_ccu_cd_ctrl.sv +++ /dev/null @@ -1,262 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -`include "ace/assign.svh" - -module ace_ccu_cd_ctrl - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type midend_ax_t = logic, - parameter type midend_id_t = logic, - parameter type user_t = logic, - parameter type cd_t = logic, - parameter type slv_bv_t = logic, - parameter type w_t = logic, - parameter type midend_r_t = logic -) ( - - input logic clk_i, - input logic rst_ni, - - // Ctrl - input logic valid_i, - output logic ready_o, - input midend_ax_t ax_i, - input logic cd_ctrl_write_i, - input logic cd_ctrl_read_i, - input slv_bv_t cd_bv_i, - input logic r_resp_shared_i, - input logic r_resp_dirty_i, - - // CD snoop channel - input cd_t [CcuCfg.u.SlvPorts-1:0] cd_i, - input logic [CcuCfg.u.SlvPorts-1:0] cd_valid_i, - output logic [CcuCfg.u.SlvPorts-1:0] cd_ready_o, - - // Mst interface - output w_t w_o, - output logic w_valid_o, - input logic w_ready_i, - - // Slv interface - output midend_r_t r_o, - output logic r_valid_o, - input logic r_ready_i -); - // Typedefs - // {{{ - typedef logic [CcuCfg.CachelineAxiTransfersIdxWidth-1:0] cl_axi_trans_idx_t; - - typedef struct packed { - midend_id_t id; - logic cd_ctrl_write; - logic cd_ctrl_read; - cl_axi_trans_idx_t r_cd_start_trans; - logic r_resp_shared; - logic r_resp_dirty; - user_t r_user; - slv_bv_t cd_bv; - axi_pkg::len_t r_len; - } cd_ctrl_sync_reg_t; - // }}} - - // Internal signals - // {{{ - cl_axi_trans_idx_t r_cd_start_trans; - cd_ctrl_sync_reg_t cd_ctrl_sync_wdata; - cd_ctrl_sync_reg_t cd_ctrl_sync_rdata; - logic cd_ctrl_sync_valid; - logic cd_ctrl_sync_ready; - logic cd_valid; - logic cd_ready; - cd_t cd; - logic r_drop; - logic r_done_q; - logic r_done_d; - logic cd_trans_cnt_clr; - logic cd_trans_cnt_en; - cl_axi_trans_idx_t cd_trans_cnt; - logic r_len_cnt_clr; - logic r_len_cnt_en; - axi_pkg::len_t r_len_cnt; - rresp_t r_resp; - // }}} - - // Input handshake decoupling - // {{{ - if (CcuCfg.CachelineAxiTransfers == 1) begin : gen_axi_start_trans_eqsize - assign r_cd_start_trans = '0; - end else begin : gen_axi_start_trans_diffsize - assign r_cd_start_trans = - ax_i.addr[CcuCfg.CachelineBytesIdxWidth-1:CcuCfg.AxiDataBytesIdxWidth]; - end - - assign cd_ctrl_sync_wdata = '{ - id: ax_i.id, - cd_ctrl_write: cd_ctrl_write_i, - cd_ctrl_read: cd_ctrl_read_i, - cd_bv: cd_bv_i, - r_cd_start_trans: r_cd_start_trans, - r_resp_shared: r_resp_shared_i, - r_resp_dirty: r_resp_dirty_i, - r_user: ax_i.user, - r_len: ax_i.len - }; - - fall_through_register #( - .T(cd_ctrl_sync_reg_t) - ) u_cd_ctrl_sync_reg ( - .clk_i, - .rst_ni, - .clr_i (1'b0), - .testmode_i(1'b0), - .data_i (cd_ctrl_sync_wdata), - .valid_i (valid_i), - .ready_o (ready_o), - .data_o (cd_ctrl_sync_rdata), - .valid_o (cd_ctrl_sync_valid), - .ready_i (cd_ctrl_sync_ready) - ); - // }}} - - // CD responses arbiter - // {{{ - ace_ccu_cd_arbiter #( - .CcuCfg(CcuCfg), - .cd_t (cd_t) - ) u_cd_merge ( - .clk_i, - .rst_ni, - .cd_valid_i (cd_valid_i), - .cd_ready_o (cd_ready_o), - .cd_i (cd_i), - .cd_sel_valid_i(cd_ctrl_sync_valid), - .cd_sel_ready_o(cd_ctrl_sync_ready), - .cd_sel_bv_i (cd_ctrl_sync_rdata.cd_bv), - .cd_valid_o (cd_valid), - .cd_ready_i (cd_ready), - .cd_o (cd) - ); - // }}} - - // CD forking - // {{{ - assign cd_sel_write = cd_ctrl_sync_rdata.cd_ctrl_write; - - assign cd_sel_read = cd_ctrl_sync_rdata.cd_ctrl_read && ~|{ - // Drop the first transfers if not needed - r_drop, - // Drop remaining transfers due to reduced transfer len - r_done_q}; - - stream_fork_dynamic #( - .N_OUP(2) - ) u_cd_fork ( - .clk_i, - .rst_ni, - .valid_i (cd_valid), - .ready_o (cd_ready), - .sel_i ({cd_sel_write, cd_sel_read}), - .sel_valid_i('1), - .sel_ready_o(), - .valid_o ({w_valid_o, r_valid_o}), - .ready_i ({w_ready_i, r_ready_i}) - ); - // }}} - - // R channel - // {{{ - counter #( - .WIDTH(CcuCfg.CachelineAxiTransfersIdxWidth) - ) u_cd_trans_counter ( - .clk_i, - .rst_ni, - .clear_i (cd_trans_cnt_clr), - .en_i (cd_trans_cnt_en), - .load_i (1'b0), - .down_i (1'b0), - .d_i ('0), - .q_o (cd_trans_cnt), - .overflow_o() - ); - - counter #( - .WIDTH(axi_pkg::LenWidth) - ) u_r_len_counter ( - .clk_i, - .rst_ni, - .clear_i (r_len_cnt_clr), - .en_i (r_len_cnt_en), - .load_i ('0), - .down_i ('0), - .d_i ('0), - .q_o (r_len_cnt), - .overflow_o() - ); - - assign r_drop = cd_trans_cnt != cd_ctrl_sync_rdata.r_cd_start_trans; - assign cd_trans_cnt_en = cd_valid && cd_ready && r_drop; - assign cd_trans_cnt_clr = cd_valid && cd_ready && cd.last; - - assign r_last = r_len_cnt == cd_ctrl_sync_rdata.r_len; - assign r_len_cnt_en = r_valid_o && r_ready_i; - assign r_len_cnt_clr = cd_valid && cd_ready && cd.last; - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) r_done_q <= 1'b0; - else r_done_q <= r_done_d; - end - - assign r_done_d = !r_len_cnt_clr && ((r_last && r_len_cnt_en) || r_done_q); - - always_comb begin : rresp_comb - r_resp = '0; - r_resp[RESP_IS_DIRTY] = cd_ctrl_sync_rdata.r_resp_dirty; - r_resp[RESP_IS_SHARED] = cd_ctrl_sync_rdata.r_resp_shared; - end - - assign r_o = '{ - id: cd_ctrl_sync_rdata.id, - data: cd.data, - resp: r_resp, - last: r_last, - user: cd_ctrl_sync_rdata.r_user - }; - // }}} - - // W channel - // {{{ - assign w_o = '{data: cd.data, strb: '1, last: cd.last, user: '0}; - // }}} - - // Assertions - // {{{ - - // If r_done_q is high, r_valid_o should never be raised - assert property (@(posedge clk_i) disable iff (!rst_ni) r_done_q |-> !r_valid_o); - // If r_drop is true, r_valid_o should never be raised - assert property (@(posedge clk_i) disable iff (!rst_ni) r_drop |-> !r_valid_o); - // If r_last is true, r_o.last should be raised - assert property (@(posedge clk_i) disable iff (!rst_ni) r_last |-> r_o.last); - // r_o.last can only be high if r_last is high - assert property (@(posedge clk_i) disable iff (!rst_ni) r_valid_o && r_o.last |-> r_last); - // r_valid_o should not be raised if not in read mode - assert property (@(posedge clk_i) disable iff (!rst_ni) !cd_ctrl_sync_rdata.cd_ctrl_read |-> !r_valid_o); - // r_valid_o should not be raised if cd_sel_read is not asserted - assert property (@(posedge clk_i) disable iff (!rst_ni) !cd_sel_read |-> !r_valid_o); - // w_valid_o should not be raised if cd_sel_write is not asserted - assert property (@(posedge clk_i) disable iff (!rst_ni) !cd_sel_write |-> !w_valid_o); - - // }}} - -endmodule diff --git a/src/ccu/ace_ccu_frontend.sv b/src/ccu/ace_ccu_frontend.sv deleted file mode 100644 index ceaf20f..0000000 --- a/src/ccu/ace_ccu_frontend.sv +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -`include "ace/typedef.svh" -`include "ace/assign.svh" - -module ace_ccu_frontend - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type slv_bv_t = logic, - parameter type slv_idx_t = logic, - parameter type slv_aw_t = logic, - parameter type w_t = logic, - parameter type slv_b_t = logic, - parameter type slv_ar_t = logic, - parameter type slv_r_t = logic, - parameter type slv_req_t = logic, - parameter type slv_resp_t = logic, - parameter type midend_aw_t = logic, - parameter type midend_b_t = logic, - parameter type midend_ar_t = logic, - parameter type midend_r_t = logic, - parameter type midend_req_t = logic, - parameter type midend_resp_t = logic -) ( - input logic clk_i, - input logic rst_ni, - - input slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_req_i, - output slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_resp_o, - - output midend_req_t ccu_nonshareable_req_o, - input midend_resp_t ccu_nonshareable_resp_i, - output midend_req_t ccu_shareable_req_o, - input midend_resp_t ccu_shareable_resp_i -); - - // Internal signals - // {{{ - slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_req_cut; - slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_resp_cut; - - slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_nonshareable_req; - slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_nonshareable_resp; - slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_req; - slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_shareable_resp; - - midend_req_t mux_shareable_req; - midend_resp_t mux_shareable_resp; - // }}} - - // Slv demuxes - // {{{ - - // Demux slv traffic into blocking and non-blocking traffic - // Non-blocking traffic is expected to proceed even when the snoop - // interface is stalling - - for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_slv_demux - - logic aw_is_nonblocking; - logic ar_is_read_no_snoop; - - ace_cut #( - .BypassAw (!CcuCfg.u.CutSlvReq), - .BypassW (!CcuCfg.u.CutSlvReq), - .BypassB (!CcuCfg.u.CutSlvResp), - .BypassAr (!CcuCfg.u.CutSlvReq), - .BypassR (!CcuCfg.u.CutSlvResp), - .BypassAck (1'b1), - .aw_chan_t (slv_aw_t), - .w_chan_t (w_t), - .b_chan_t (slv_b_t), - .ar_chan_t (slv_ar_t), - .r_chan_t (slv_r_t), - .ace_req_t (slv_req_t), - .ace_resp_t(slv_resp_t) - ) u_ace_cut ( - .clk_i, - .rst_ni, - .slv_req_i (slv_req_i[i]), - .slv_resp_o(slv_resp_o[i]), - .mst_req_o (slv_req_cut[i]), - .mst_resp_i(slv_resp_cut[i]) - ); - - // Separate in each port blocking and non-blocking traffic - assign aw_is_nonblocking = ace_aw_is_non_blocking( - slv_req_cut[i].aw.bar[0], slv_req_cut[i].aw.domain, slv_req_cut[i].aw.snoop - ); - - assign ar_is_read_no_snoop = ace_is_read_no_snoop( - slv_req_cut[i].ar.bar[0], slv_req_cut[i].ar.domain, slv_req_cut[i].ar.snoop - ); - - ace_demux_simple #( - .AxiIdWidth (CcuCfg.u.AxiSlvIdWidth), - .AtopSupport(1'b1), - .aw_chan_t (slv_aw_t), - .w_chan_t (w_t), - .ar_chan_t (slv_ar_t), - .req_t (slv_req_t), - .resp_t (slv_resp_t), - .NoMstPorts (2), - .MaxTrans (CcuCfg.u.MaxTransactions), - .AxiLookBits(CcuCfg.u.AxiIdLookupBits), - .UniqueIds (CcuCfg.u.AxiUniqueIds) - ) u_ace_demux ( - .clk_i, - .rst_ni, - .test_i (1'b0), - .slv_req_i (slv_req_cut[i]), - .slv_resp_o (slv_resp_cut[i]), - .slv_aw_select_i(aw_is_nonblocking), - .slv_ar_select_i(ar_is_read_no_snoop), - .mst_reqs_o ({slv_nonshareable_req[i], slv_shareable_req[i]}), - .mst_resps_i ({slv_nonshareable_resp[i], slv_shareable_resp[i]}) - ); - end - // }}} - - // Nonshareable mux - // {{{ - ace_mux #( - .SlvAxiIDWidth(CcuCfg.u.AxiSlvIdWidth), - .slv_aw_chan_t(slv_aw_t), - .mst_aw_chan_t(midend_aw_t), - .w_chan_t (w_t), - .slv_b_chan_t (slv_b_t), - .mst_b_chan_t (midend_b_t), - .slv_ar_chan_t(slv_ar_t), - .mst_ar_chan_t(midend_ar_t), - .slv_r_chan_t (slv_r_t), - .mst_r_chan_t (midend_r_t), - .slv_req_t (slv_req_t), - .slv_resp_t (slv_resp_t), - .mst_req_t (midend_req_t), - .mst_resp_t (midend_resp_t), - .NoSlvPorts (CcuCfg.u.SlvPorts), - .MaxWTrans (32'd8), - .MaxBTrans (CcuCfg.u.MaxTransactions), - .MaxRTrans (CcuCfg.u.MaxTransactions), - .FallThrough (1'b1), - .SpillAw (1'b0), - .SpillW (1'b0), - .SpillB (1'b0), - .SpillAr (1'b0), - .SpillR (1'b0) - ) u_ace_nonshareable_mux ( - .clk_i, - .rst_ni, - .test_i (1'b0), - .slv_reqs_i (slv_nonshareable_req), - .slv_resps_o(slv_nonshareable_resp), - .mst_req_o (ccu_nonshareable_req_o), - .mst_resp_i (ccu_nonshareable_resp_i) - ); - // }}} - - // Nonshareable demux - // {{{ - ace_mux #( - .SlvAxiIDWidth(CcuCfg.u.AxiSlvIdWidth), - .slv_aw_chan_t(slv_aw_t), - .mst_aw_chan_t(midend_aw_t), - .w_chan_t (w_t), - .slv_b_chan_t (slv_b_t), - .mst_b_chan_t (midend_b_t), - .slv_ar_chan_t(slv_ar_t), - .mst_ar_chan_t(midend_ar_t), - .slv_r_chan_t (slv_r_t), - .mst_r_chan_t (midend_r_t), - .slv_req_t (slv_req_t), - .slv_resp_t (slv_resp_t), - .mst_req_t (midend_req_t), - .mst_resp_t (midend_resp_t), - .NoSlvPorts (CcuCfg.u.SlvPorts), - .MaxWTrans (32'd8), - .MaxBTrans (CcuCfg.u.MaxTransactions), - .MaxRTrans (CcuCfg.u.MaxTransactions), - .FallThrough (1'b1), - .SpillAw (1'b0), - .SpillW (1'b0), - .SpillB (1'b0), - .SpillAr (1'b0), - .SpillR (1'b0) - ) u_ace_shareable_mux ( - .clk_i, - .rst_ni, - .test_i (1'b0), - .slv_reqs_i (slv_shareable_req), - .slv_resps_o(slv_shareable_resp), - .mst_req_o (mux_shareable_req), - .mst_resp_i (mux_shareable_resp) - ); - // }}} - - // AMO LR/SC monitor - // {{{ - - localparam longint unsigned ADDR_BEGIN = '0; - localparam longint unsigned ADDR_END = {CcuCfg.u.AxiAddrWidth{1'b1}}; - - // All subsequent ACE defines will not use RACK/WACKS - `define __ACE_NO_ACKS - - // Internal request type without acks - `ACE_TYPEDEF_REQ_T(__midend_req_t, midend_aw_t, w_t, midend_ar_t) - - __midend_req_t __mux_shareable_req; - __midend_req_t __ccu_shareable_req; - - `ACE_ASSIGN_REQ_STRUCT(__mux_shareable_req, mux_shareable_req) - `ACE_ASSIGN_REQ_STRUCT(ccu_shareable_req_o, __ccu_shareable_req) - - // xACK bypass - assign ccu_shareable_req_o.wack = mux_shareable_req.wack; - assign ccu_shareable_req_o.rack = mux_shareable_req.rack; - - `undef __ACE_NO_ACKS - - axi_riscv_lrsc_structs #( - .ADDR_BEGIN (ADDR_BEGIN), - .ADDR_END (ADDR_END), - .AXI_ADDR_WIDTH (CcuCfg.u.AxiAddrWidth), - .AXI_DATA_WIDTH (CcuCfg.u.AxiDataWidth), - .AXI_ID_WIDTH (CcuCfg.AxiMidendIdWidth), - .AXI_USER_WIDTH (CcuCfg.u.AxiUserWidth), - .AXI_MAX_READ_TXNS (CcuCfg.u.MaxTransactions), - .AXI_MAX_WRITE_TXNS (CcuCfg.u.MaxTransactions), - .AXI_USER_AS_ID (CcuCfg.u.AmoAxiUserAsId), - .AXI_USER_ID_MSB (CcuCfg.u.AmoAxiUserIdMsb), - .AXI_USER_ID_LSB (CcuCfg.u.AmoAxiUserIdLsb), - .AXI_ADDR_LSB (CcuCfg.u.AmoAxiAddrLsb), - .FULL_BANDWIDTH (1), - .CUT_OUP_POP_INP_GNT(0), - .NUM_RESERVATIONS (CcuCfg.u.AmoNumReservations), - .aw_chan_t (midend_aw_t), - .b_chan_t (midend_b_t), - .r_chan_t (midend_r_t), - .req_t (__midend_req_t), - .resp_t (midend_resp_t) - ) u_axi_riscv_lrsc ( - .clk_i, - .rst_ni, - .slv_req_i (__mux_shareable_req), - .slv_resp_o(mux_shareable_resp), - .mst_req_o (__ccu_shareable_req), - .mst_resp_i(ccu_shareable_resp_i) - ); - // }}} - -endmodule diff --git a/src/ccu/ace_ccu_pkg.sv b/src/ccu/ace_ccu_pkg.sv deleted file mode 100644 index c9da615..0000000 --- a/src/ccu/ace_ccu_pkg.sv +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -package ace_ccu_pkg; - - typedef struct packed { - // Number of slv ports - int unsigned SlvPorts; - // Maximum blocking inflight transactions - int unsigned MaxTransactions; - // Shareable W channel buffer size - int unsigned ShareableWFifoDepth; - // Instantiate replay table - bit ReplayEn; - // Address bits to be used during conflict checking - int unsigned NLineWidth; - // AXI/ACE parameters - int unsigned AxiAddrWidth; - int unsigned AxiDataWidth; - int unsigned AxiUserWidth; - int unsigned AxiSlvIdWidth; - // Unique IDs are passed to frontend demux - bit AxiUniqueIds; - // Lookup bits in frontend demux - int unsigned AxiIdLookupBits; - // Cache parameters - int unsigned CachelineWidth; - // I/O cuts - bit CutSlvReq; - bit CutSlvResp; - bit CutMstReq; - bit CutMstResp; - bit CutSnoopReq; - bit CutSnoopResp; - // LR/SC reservation buffer - // Use the AXI User signal instead of the AXI ID to track reservations - bit AmoAxiUserAsId; - // MSB of the ID in the user signal - int unsigned AmoAxiUserIdMsb; - // LSB of the ID in the user signal - int unsigned AmoAxiUserIdLsb; - // log2 of granularity for reservations (ignored LSBs) - int unsigned AmoAxiAddrLsb; - // Number of simultaineous reservations - int unsigned AmoNumReservations; - } ace_ccu_user_cfg_t; - - typedef struct packed { - // User parameters - ace_ccu_user_cfg_t u; - // Derived parameters - int unsigned SlvPortIdxWidth; - int unsigned TransactionIdxWidth; - int unsigned CachelineBytes; - int unsigned CachelineBytesIdxWidth; - int unsigned CachelineAddrWidth; - int unsigned CachelineAxiTransfers; - int unsigned CachelineAxiTransfersIdxWidth; - int unsigned AxiMidendIdWidth; - int unsigned AxiBackendIdWidth; - int unsigned AxiDataBytes; - int unsigned AxiDataBytesIdxWidth; - int unsigned AxiStrbWidth; - int unsigned AxiMstIdWidth; - int unsigned ArIdCounters; - } ace_ccu_cfg_t; - - function automatic ace_ccu_cfg_t ace_ccu_build_cfg(ace_ccu_user_cfg_t u); - ace_ccu_cfg_t p; - - p.u = u; - - p.SlvPortIdxWidth = $clog2(u.SlvPorts); - p.TransactionIdxWidth = $clog2(u.MaxTransactions); - p.CachelineBytes = u.CachelineWidth / 8; - p.CachelineBytesIdxWidth = $clog2(p.CachelineBytes); - p.CachelineAddrWidth = u.AxiAddrWidth - p.CachelineBytesIdxWidth; - p.CachelineAxiTransfers = u.CachelineWidth / u.AxiDataWidth; - p.CachelineAxiTransfersIdxWidth = $clog2(p.CachelineAxiTransfers); - p.AxiDataBytes = u.AxiDataWidth / 8; - p.AxiDataBytesIdxWidth = $clog2(p.AxiDataBytes); - p.AxiStrbWidth = u.AxiDataWidth / 8; - p.AxiMidendIdWidth = u.AxiSlvIdWidth + p.SlvPortIdxWidth; - p.AxiBackendIdWidth = p.AxiMidendIdWidth + 1; - p.AxiMstIdWidth = p.AxiBackendIdWidth + 1; - - return p; - endfunction - -endpackage diff --git a/src/ccu/ace_ccu_read.sv b/src/ccu/ace_ccu_read.sv deleted file mode 100644 index d211208..0000000 --- a/src/ccu/ace_ccu_read.sv +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -`include "axi/assign.svh" -`include "ace/convert.svh" - -module ace_ccu_read - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type midend_ax_t = logic, - parameter type tid_t = logic, - parameter type backend_ar_t = logic, - parameter type backend_r_t = logic, - parameter type midend_r_t = logic, - parameter type slv_idx_t = logic -) ( - input logic clk_i, - input logic rst_ni, - - // Ctrl - input logic valid_i, - output logic ready_o, - input midend_ax_t ax_i, - - // Snp interface - input midend_r_t cd_r_i, - input logic cd_r_valid_i, - output logic cd_r_ready_o, - - // Slv interface - output midend_r_t r_o, - output logic r_valid_o, - input logic r_ready_i, - // Mst interface - output backend_ar_t ar_o, - output logic ar_valid_o, - input logic ar_ready_i, - input backend_r_t r_i, - input logic r_valid_i, - output logic r_ready_o -); - - backend_ar_t ar_sync_wdata; - midend_r_t mem_r; - - // AR channel - // {{{ - `AXI_ASSIGN_AR_STRUCT(ar_sync_wdata, ax_i) - - fall_through_register #( - .T(backend_ar_t) - ) u_ar_sync_reg ( - .clk_i, - .rst_ni, - .clr_i (1'b0), - .testmode_i(1'b0), - .valid_i (valid_i), - .ready_o (ready_o), - .data_i (ar_sync_wdata), - .valid_o (ar_valid_o), - .ready_i (ar_ready_i), - .data_o (ar_o) - ); - // }}} - - // R channel - // {{{ - `AXI_TO_ACE_ASSIGN_R_STRUCT(mem_r, r_i) - - typedef struct packed { - logic mem; - logic cd; - } req_mask_t; - - req_mask_t [CcuCfg.u.SlvPorts-1:0] req_mask_q; - req_mask_t [CcuCfg.u.SlvPorts-1:0] req_mask_d; - req_mask_t req_mask; - - rr_arb_tree #( - .NumIn (2), - .DataType (midend_r_t), - .AxiVldRdy(1'b1), - .LockIn (1'b1) - ) u_r_arbiter ( - .clk_i, - .rst_ni, - .flush_i(1'b0), - .rr_i (1'b0), - .req_i ({r_valid_i, cd_r_valid_i} & ~req_mask), - .gnt_o ({r_ready_o, cd_r_ready_o}), - .data_i ({mem_r, cd_r_i}), - .req_o (r_valid_o), - .gnt_i (r_ready_i), - .data_o (r_o), - .idx_o (r_arb) - ); - - assign req_mask.mem = req_mask_q[mem_r.id>>CcuCfg.u.AxiSlvIdWidth].mem; - assign req_mask.cd = req_mask_q[cd_r_i.id>>CcuCfg.u.AxiSlvIdWidth].cd; - - for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin - slv_idx_t slv_idx; - logic read_mem_q; - logic read_mem_d; - logic read_busy_q; - logic read_busy_d; - - assign slv_idx = r_o.id >> CcuCfg.u.AxiSlvIdWidth; - - always_comb begin - read_busy_d = read_busy_q; - req_mask_d[i] = req_mask_q[i]; - - if (read_busy_q) begin - if (r_valid_o && r_ready_i && r_o.last) begin - read_busy_d = 1'b0; - req_mask_d[i] = '0; - end - end else if (slv_idx == i && r_valid_o && r_ready_i && !r_o.last) begin - read_busy_d = 1'b1; - req_mask_d[i].mem = ~r_arb; - req_mask_d[i].cd = r_arb; - end - end - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - read_busy_q <= 1'b0; - req_mask_q[i] <= '0; - end else begin - read_busy_q <= read_busy_d; - req_mask_q[i] <= req_mask_d[i]; - end - end - end - // }}} - - - -endmodule diff --git a/src/ccu/ace_ccu_snoop_pipe.sv b/src/ccu/ace_ccu_snoop_pipe.sv deleted file mode 100644 index ab8dad8..0000000 --- a/src/ccu/ace_ccu_snoop_pipe.sv +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -module ace_ccu_snoop_pipe - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type domain_rule_t = logic, - parameter type midend_aw_t = logic, - parameter type midend_ar_t = logic, - parameter type midend_ax_t = logic, - parameter type midend_id_t = logic, - parameter type ac_t = logic, - parameter type cr_t = logic, - parameter type slv_bv_t = logic, - parameter type slv_idx_t = logic, - parameter type tid_t = logic, - parameter type nline_t = logic -) ( - input logic clk_i, - input logic rst_ni, - - input midend_aw_t st0_aw_i, - input logic st0_aw_valid_i, - output logic st0_aw_ready_o, - - input midend_ar_t st0_ar_i, - input logic st0_ar_valid_i, - output logic st0_ar_ready_o, - - input midend_ar_t st0_replay_ar_i, - input logic st0_replay_ar_valid_i, - output logic st0_replay_ar_ready_o, - - output ac_t st0_ac_o, - output slv_bv_t st0_ac_bv_o, - output logic st0_ac_valid_o, - input logic st0_ac_ready_i, - - input cr_t st1_cr_i, - output slv_bv_t st1_cr_bv_o, - input logic st1_cr_valid_i, - output logic st1_cr_ready_o, - - input logic st0_replay_full_i, - output logic st0_replay_check_o, - input logic st0_replay_hit_i, - output logic st0_replay_alloc_o, - - input logic st0_tracker_full_i, - output logic st0_tracker_check_o, - input logic st0_tracker_check_hit_i, - output logic st0_tracker_alloc_o, - output logic st0_tracker_alloc_b_o, - output logic st0_tracker_alloc_r_o, - output nline_t st0_tracker_alloc_nline_o, - output midend_id_t st0_tracker_alloc_id_o, - input tid_t st0_tracker_alloc_tid_i, - - input domain_rule_t [CcuCfg.u.SlvPorts-1:0] st0_domain_rule_i, - - output midend_ax_t st1_ax_o, - output logic st1_ax_is_write_o, - output logic st1_r_resp_shared_o, - output logic st1_r_resp_dirty_o, - output tid_t st1_ax_tid_o, - output logic st1_cd_ctrl_write_o, - output logic st1_cd_ctrl_read_o, - output logic st1_write_valid_o, - input logic st1_write_ready_i, - output logic st1_read_valid_o, - input logic st1_read_ready_i, - output logic st1_cd_ctrl_valid_o, - input logic st1_cd_ctrl_ready_i, - - output logic evt_st0_stall_o, - output logic evt_st1_stall_o -); - // Typedefs - // {{{ - typedef struct packed { - logic ax_is_write; - logic ar_accepts_dirty; - logic ar_accepts_dirty_shared; - logic ar_accepts_shared; - slv_bv_t cr_bv; - tid_t tid; - midend_ax_t ax; - } st1_t; - // }}} - - // Internal signals - // {{{ - midend_ax_t st0_ax; - logic st0_ax_valid; - logic st0_ax_ready; - logic st0_ax_is_write; - acsnoop_t st0_ax_acsnoop; - axdomain_t st0_ax_domain; - logic st0_ar_accepts_dirty; - logic st0_ar_accepts_dirty_shared; - logic st0_ar_accepts_shared; - logic st0_pipe_valid; - logic st0_pipe_ready; - logic st0_hazard; - logic st0_replay; - slv_idx_t st0_slv_idx; - st1_t st0_pipe; - - logic st1_pipe_valid; - logic st1_pipe_ready; - st1_t st1; - logic st1_valid; - logic st1_ready; - logic st1_r_resp_shared; - logic st1_r_resp_dirty; - logic st1_aw_sel; - logic st1_ar_sel; - logic st1_cd_sel; - logic st1_cd_write; - logic st1_cd_read; - // }}} - - // AX arbiter - // {{{ - ace_ccu_ax_arbiter #( - .CcuCfg (CcuCfg), - .midend_aw_t(midend_aw_t), - .midend_ar_t(midend_ar_t), - .midend_ax_t(midend_ax_t) - ) u_st0_ax_arbiter ( - .clk_i, - .rst_ni, - .replay_full_i (st0_replay_full_i), - .aw_i (st0_aw_i), - .aw_valid_i (st0_aw_valid_i), - .aw_ready_o (st0_aw_ready_o), - .ar_i (st0_ar_i), - .ar_valid_i (st0_ar_valid_i), - .ar_ready_o (st0_ar_ready_o), - .replay_ar_i (st0_replay_ar_i), - .replay_ar_valid_i (st0_replay_ar_valid_i), - .replay_ar_ready_o (st0_replay_ar_ready_o), - .ax_o (st0_ax), - .ax_valid_o (st0_ax_valid), - .ax_ready_i (st0_ax_ready), - .ax_is_write_o (st0_ax_is_write), - .ax_is_replay_o (st0_ax_is_replay), - .ax_acsnoop_o (st0_ax_acsnoop), - .ar_accepts_dirty_o (st0_ar_accepts_dirty), - .ar_accepts_dirty_shared_o(st0_ar_accepts_dirty_shared), - .ar_accepts_shared_o (st0_ar_accepts_shared), - .ax_domain_o (st0_ax_domain) - ); - // }}} - - // Stage 0 - // {{{ - always_comb begin : hazard_comb - st0_hazard = 1'b1; - st0_replay = 1'b0; - - st0_tracker_check_o = 1'b0; - st0_replay_check_o = 1'b0; - - if (!st0_tracker_full_i && st0_pipe_ready) begin - // Check if there is any conflict on nline or ID (tracker) - st0_tracker_check_o = 1'b1; - if (st0_ax_is_write) begin - // The AX originates from AW - if (st0_tracker_check_hit_i) begin - // Writes are not replayable - // nline conflict cause head of line stalling - // Resolution of conflicts happens by draining - // the downstream buffers - end else begin - // The write is clear to go - st0_hazard = 1'b0; - end - end else begin - // The AX originates from AR - // Check also if there is any conflict on ID (replay) - st0_replay_check_o = !st0_ax_is_replay; - if (!st0_tracker_check_hit_i && !st0_replay_hit_i) begin - // No conflict is detected - st0_hazard = 1'b0; - end else if (CcuCfg.u.ReplayEn) begin - // Reads are replayable - // ID or nline conflict is avoided by putting the request on hold - st0_replay = 1'b1; - end - end - end - end - - // Handshaking logic - assign st0_ac_valid_o = st0_hazard ? 1'b0 : st0_ax_valid; - assign st0_ax_ready = st0_hazard ? st0_replay : st0_ac_ready_i; - // Allocations - assign st0_tracker_alloc_o = st0_ax_valid && st0_ax_ready && !st0_replay; - assign st0_tracker_alloc_b_o = st0_ax_is_write; - assign st0_tracker_alloc_r_o = !st0_ax_is_write || st0_ax.atop[axi_pkg::ATOP_R_RESP]; - assign st0_tracker_alloc_nline_o = st0_ax.addr[CcuCfg.CachelineBytesIdxWidth+:CcuCfg.u.NLineWidth]; - assign st0_tracker_alloc_id_o = st0_ax.id; - assign st0_replay_alloc_o = st0_ax_valid && st0_ax_ready && st0_replay; - - assign st0_pipe_valid = st0_ac_valid_o && st0_ac_ready_i; - - assign st0_slv_idx = st0_ax.id[CcuCfg.AxiMidendIdWidth-1 : CcuCfg.u.AxiSlvIdWidth]; - - always_comb begin : ac_bv_comb - unique case (st0_ax_domain) - InnerShareable: st0_ac_bv_o = st0_domain_rule_i[st0_slv_idx].inner; - OuterShareable: st0_ac_bv_o = st0_domain_rule_i[st0_slv_idx].outer; - System: st0_ac_bv_o = ~st0_domain_rule_i[st0_slv_idx].initiator; - default: st0_ac_bv_o = '0; - endcase - end - - assign st0_ac_o = '{ - addr: axi_pkg::aligned_addr(st0_ax.addr, CcuCfg.CachelineBytesIdxWidth), - snoop: st0_ax_acsnoop, - prot: '0 - }; - - assign st0_pipe = '{ - ax_is_write: st0_ax_is_write, - ar_accepts_dirty: st0_ar_accepts_dirty, - ar_accepts_dirty_shared: st0_ar_accepts_dirty_shared, - ar_accepts_shared: st0_ar_accepts_shared, - cr_bv: st0_ac_bv_o, - tid: st0_tracker_alloc_tid_i, - ax: st0_ax - }; - // }}} - - // Stage 1 - // {{{ - spill_register #( - .T (st1_t), - .Bypass(1'b0) - ) u_st1_pipe_reg ( - .clk_i, - .rst_ni, - .valid_i(st0_pipe_valid), - .ready_o(st0_pipe_ready), - .data_i (st0_pipe), - .valid_o(st1_valid), - .ready_i(st1_ready), - .data_o (st1) - ); - - stream_join #( - .N_INP(2) - ) u_st1_join ( - .inp_valid_i({st1_valid, st1_cr_valid_i}), - .inp_ready_o({st1_ready, st1_cr_ready_o}), - .oup_valid_o(st1_pipe_valid), - .oup_ready_i(st1_pipe_ready) - ); - - assign st1_cr_bv_o = st1.cr_bv; - - assign st1_r_resp_shared = st1_cr_i.IsShared; - assign st1_r_resp_dirty = st1_cr_i.PassDirty && st1.ar_accepts_dirty; - - always_comb begin - st1_aw_sel = 1'b0; - st1_ar_sel = 1'b0; - st1_cd_sel = 1'b0; - - st1_cd_write = 1'b0; - st1_cd_read = 1'b0; - - if (st1.ax_is_write) begin - // The transactions is a shareable write - st1_aw_sel = 1'b1; - if (st1_cr_i.DataTransfer) begin - // A writeback is expected - st1_cd_sel = 1'b1; - // If dirty data is passed, do a writeback - // otherwise CD data will be dropped - st1_cd_write = st1_cr_i.PassDirty; - end - end else begin - // The transactions is a shareable read - if (st1_cr_i.DataTransfer) begin - // A cacheline is expected on CD - st1_cd_sel = 1'b1; - st1_cd_read = 1'b1; - if (st1_cr_i.PassDirty && !st1.ar_accepts_dirty) begin - // The cacheline is dirty but the initiator cannot accept it - st1_aw_sel = 1'b1; - st1_cd_write = 1'b1; - end - end else begin - // The cacheline must be obtained from memory - st1_ar_sel = 1'b1; - end - end - end - - stream_fork_dynamic #( - .N_OUP(3) - ) u_st1_fork ( - .clk_i, - .rst_ni, - .valid_i (st1_pipe_valid), - .ready_o (st1_pipe_ready), - .sel_i ({st1_aw_sel, st1_ar_sel, st1_cd_sel}), - .sel_valid_i('1), - .sel_ready_o(), - .valid_o ({st1_write_valid_o, st1_read_valid_o, st1_cd_ctrl_valid_o}), - .ready_i ({st1_write_ready_i, st1_read_ready_i, st1_cd_ctrl_ready_i}) - ); - // }}} - - // Pipe outputs - // {{{ - assign st1_ax_o = st1.ax; - assign st1_ax_is_write_o = st1.ax_is_write; - assign st1_r_resp_shared_o = st1_r_resp_shared; - assign st1_r_resp_dirty_o = st1_r_resp_dirty; - assign st1_cd_ctrl_write_o = st1_cd_write; - assign st1_cd_ctrl_read_o = st1_cd_read; - assign st1_ax_tid_o = st1.tid; - // }}} - - // Performance events - // {{{ - assign evt_st0_stall_o = st0_ax_valid && !st0_ax_ready; - assign evt_st1_stall_o = st1_pipe_valid && !st1_pipe_ready; - // }}} - - // Assertions - // {{{ - - // initiator bit in st0_ac_bv_o should never be set to 1 - assert property (@(posedge clk_i) disable iff (!rst_ni) st0_ac_bv_o[st0_slv_idx] == 1'b0); - // }}} - -endmodule diff --git a/src/ccu/ace_ccu_top.sv b/src/ccu/ace_ccu_top.sv deleted file mode 100644 index 8d09bb5..0000000 --- a/src/ccu/ace_ccu_top.sv +++ /dev/null @@ -1,710 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -`include "axi/assign.svh" -`include "axi/typedef.svh" -`include "ace/assign.svh" -`include "ace/typedef.svh" -`include "ace/convert.svh" -`include "ace/domain.svh" - -module ace_ccu_top - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type domain_rule_t = logic, - parameter type slv_ar_t = logic, - parameter type slv_aw_t = logic, - parameter type w_t = logic, - parameter type slv_b_t = logic, - parameter type slv_r_t = logic, - parameter type slv_req_t = logic, - parameter type slv_resp_t = logic, - parameter type mst_ar_t = logic, - parameter type mst_aw_t = logic, - parameter type mst_b_t = logic, - parameter type mst_r_t = logic, - parameter type mst_req_t = logic, - parameter type mst_resp_t = logic, - parameter type snoop_ac_t = logic, - parameter type snoop_cr_t = logic, - parameter type snoop_cd_t = logic, - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic -) ( - input logic clk_i, - input logic rst_ni, - - input slv_req_t [CcuCfg.u.SlvPorts-1:0] slv_req_i, - output slv_resp_t [CcuCfg.u.SlvPorts-1:0] slv_resp_o, - - input domain_rule_t [CcuCfg.u.SlvPorts-1:0] domain_rule_i, - - output snoop_req_t [CcuCfg.u.SlvPorts-1:0] snoop_req_o, - input snoop_resp_t [CcuCfg.u.SlvPorts-1:0] snoop_resp_i, - - output mst_req_t mst_req_o, - input mst_resp_t mst_resp_i -); - - // Typdefs - // {{{ - - // AXI/ACE types - typedef logic [CcuCfg.u.AxiSlvIdWidth-1:0] slv_id_t; - typedef logic [CcuCfg.AxiMidendIdWidth-1:0] midend_id_t; - typedef logic [CcuCfg.AxiBackendIdWidth-1:0] backend_id_t; - typedef logic [CcuCfg.AxiMstIdWidth-1:0] mst_id_t; - typedef logic [CcuCfg.u.AxiAddrWidth-1:0] addr_t; - typedef logic [CcuCfg.u.AxiDataWidth-1:0] data_t; - typedef logic [CcuCfg.AxiStrbWidth-1:0] strb_t; - typedef logic [CcuCfg.u.AxiUserWidth-1:0] user_t; - - // Intermediate ACE and AXI channel types - `ACE_TYPEDEF_AW_CHAN_T(midend_aw_t, addr_t, midend_id_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(midend_b_t, midend_id_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(midend_ar_t, addr_t, midend_id_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(midend_r_t, data_t, midend_id_t, user_t) - `ACE_TYPEDEF_REQ_T(midend_req_t, midend_aw_t, w_t, midend_ar_t) - `ACE_TYPEDEF_RESP_T(midend_resp_t, midend_b_t, midend_r_t) - - `AXI_TYPEDEF_AW_CHAN_T(backend_aw_t, addr_t, backend_id_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(backend_b_t, backend_id_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(backend_ar_t, addr_t, backend_id_t, user_t) - `AXI_TYPEDEF_R_CHAN_T(backend_r_t, data_t, backend_id_t, user_t) - `AXI_TYPEDEF_REQ_T(backend_req_t, backend_aw_t, w_t, backend_ar_t) - `AXI_TYPEDEF_RESP_T(backend_resp_t, backend_b_t, backend_r_t) - - // Transaction ID type - typedef logic [CcuCfg.TransactionIdxWidth-1:0] tid_t; - typedef logic [CcuCfg.u.SlvPorts-1:0] slv_bv_t; - typedef logic [CcuCfg.SlvPortIdxWidth-1:0] slv_idx_t; - typedef logic [CcuCfg.u.NLineWidth-1:0] nline_t; - - // Internal AW/AR request unified representation - typedef struct packed { - midend_id_t id; - addr_t addr; - axi_pkg::len_t len; - axi_pkg::size_t size; - axi_pkg::burst_t burst; - logic lock; - axi_pkg::cache_t cache; - axi_pkg::prot_t prot; - axi_pkg::qos_t qos; - axi_pkg::region_t region; - axi_pkg::atop_t atop; - user_t user; - } midend_ax_t; - // }}} - - // Internal signals - // {{{ - midend_req_t ccu_nonshareable_req; - midend_resp_t ccu_nonshareable_resp; - midend_req_t ccu_shareable_req; - midend_resp_t ccu_shareable_resp; - - midend_ar_t replay_ar; - logic replay_ar_valid; - logic replay_ar_ready; - - snoop_ac_t ac; - slv_bv_t ac_bv; - logic ac_valid; - logic ac_ready; - snoop_cr_t cr; - slv_bv_t cr_bv; - logic cr_valid; - logic cr_ready; - slv_bv_t snoop_ac_valid; - slv_bv_t snoop_ac_ready; - snoop_ac_t [CcuCfg.u.SlvPorts-1:0] snoop_ac; - slv_bv_t snoop_cr_valid; - slv_bv_t snoop_cr_ready; - snoop_cr_t [CcuCfg.u.SlvPorts-1:0] snoop_cr; - slv_bv_t snoop_cd_valid; - slv_bv_t snoop_cd_ready; - snoop_cd_t [CcuCfg.u.SlvPorts-1:0] snoop_cd; - - logic replay_full; - logic replay_check; - logic replay_hit; - logic replay_alloc; - - logic tracker_full; - logic tracker_check; - logic tracker_check_hit; - logic tracker_alloc; - logic tracker_alloc_b; - logic tracker_alloc_r; - nline_t tracker_alloc_nline; - midend_id_t tracker_alloc_id; - tid_t tracker_alloc_tid; - logic tracker_dealloc_r_resp; - logic tracker_dealloc_b_resp; - logic tracker_dealloc_check_b_resp; - midend_id_t tracker_dealloc_r_resp_id; - midend_id_t tracker_dealloc_b_resp_id; - - midend_ax_t pipe_ax; - logic pipe_ax_is_write; - logic pipe_r_resp_shared; - logic pipe_r_resp_dirty; - slv_bv_t pipe_cd_bv; - tid_t pipe_ax_tid; - logic pipe_cd_ctrl_write; - logic pipe_cd_ctrl_read; - logic write_valid; - logic write_ready; - logic read_valid; - logic read_ready; - logic cd_ctrl_valid; - logic cd_ctrl_read; - - w_t write_w; - logic write_w_valid; - logic write_w_ready; - - w_t cd_w; - logic cd_w_valid; - logic cd_w_ready; - midend_r_t cd_r; - logic cd_r_valid; - logic cd_r_ready; - - backend_req_t axi_shareable_req; - backend_resp_t axi_shareable_resp; - backend_req_t axi_nonshareable_req; - backend_resp_t axi_nonshareable_resp; - - mst_req_t mst_req; - mst_resp_t mst_resp; - - // }}} - - // Frontend - // {{{ - ace_ccu_frontend #( - .CcuCfg (CcuCfg), - .slv_bv_t (slv_bv_t), - .slv_idx_t (slv_idx_t), - .slv_aw_t (slv_aw_t), - .w_t (w_t), - .slv_b_t (slv_b_t), - .slv_ar_t (slv_ar_t), - .slv_r_t (slv_r_t), - .slv_req_t (slv_req_t), - .slv_resp_t (slv_resp_t), - .midend_aw_t (midend_aw_t), - .midend_b_t (midend_b_t), - .midend_ar_t (midend_ar_t), - .midend_r_t (midend_r_t), - .midend_req_t (midend_req_t), - .midend_resp_t(midend_resp_t) - ) u_ace_ccu_frontend ( - .clk_i, - .rst_ni, - .slv_req_i, - .slv_resp_o, - .ccu_nonshareable_req_o (ccu_nonshareable_req), - .ccu_nonshareable_resp_i(ccu_nonshareable_resp), - .ccu_shareable_req_o (ccu_shareable_req), - .ccu_shareable_resp_i (ccu_shareable_resp) - ); - // }}} - - // Snoop pipeline - // {{{ - ace_ccu_snoop_pipe #( - .CcuCfg (CcuCfg), - .domain_rule_t(domain_rule_t), - .midend_ax_t (midend_ax_t), - .midend_aw_t (midend_aw_t), - .midend_ar_t (midend_ar_t), - .midend_id_t (midend_id_t), - .ac_t (snoop_ac_t), - .cr_t (snoop_cr_t), - .slv_bv_t (slv_bv_t), - .slv_idx_t (slv_idx_t), - .tid_t (tid_t), - .nline_t (nline_t) - ) u_ace_ccu_snoop_pipe ( - .clk_i, - .rst_ni, - .st0_aw_i (ccu_shareable_req.aw), - .st0_aw_valid_i (ccu_shareable_req.aw_valid), - .st0_aw_ready_o (ccu_shareable_resp.aw_ready), - .st0_ar_i (ccu_shareable_req.ar), - .st0_ar_valid_i (ccu_shareable_req.ar_valid), - .st0_ar_ready_o (ccu_shareable_resp.ar_ready), - .st0_replay_ar_i (replay_ar), - .st0_replay_ar_valid_i (replay_ar_valid), - .st0_replay_ar_ready_o (replay_ar_ready), - .st0_ac_o (ac), - .st0_ac_bv_o (ac_bv), - .st0_ac_valid_o (ac_valid), - .st0_ac_ready_i (ac_ready), - .st0_replay_full_i (replay_full), - .st0_replay_check_o (replay_check), - .st0_replay_hit_i (replay_hit), - .st0_replay_alloc_o (replay_alloc), - .st0_tracker_full_i (tracker_full), - .st0_tracker_check_o (tracker_check), - .st0_tracker_check_hit_i (tracker_check_hit), - .st0_tracker_alloc_o (tracker_alloc), - .st0_tracker_alloc_b_o (tracker_alloc_b), - .st0_tracker_alloc_r_o (tracker_alloc_r), - .st0_tracker_alloc_nline_o(tracker_alloc_nline), - .st0_tracker_alloc_id_o (tracker_alloc_id), - .st0_tracker_alloc_tid_i (tracker_alloc_tid), - .st0_domain_rule_i (domain_rule_i), - .st1_cr_i (cr), - .st1_cr_bv_o (cr_bv), - .st1_cr_valid_i (cr_valid), - .st1_cr_ready_o (cr_ready), - .st1_ax_o (pipe_ax), - .st1_ax_is_write_o (pipe_ax_is_write), - .st1_r_resp_shared_o (pipe_r_resp_shared), - .st1_r_resp_dirty_o (pipe_r_resp_dirty), - .st1_ax_tid_o (pipe_ax_tid), - .st1_cd_ctrl_write_o (pipe_cd_ctrl_write), - .st1_cd_ctrl_read_o (pipe_cd_ctrl_read), - .st1_write_valid_o (write_valid), - .st1_write_ready_i (write_ready), - .st1_read_valid_o (read_valid), - .st1_read_ready_i (read_ready), - .st1_cd_ctrl_valid_o (cd_ctrl_valid), - .st1_cd_ctrl_ready_i (cd_ctrl_ready), - .evt_st0_stall_o ( /*unused*/), - .evt_st1_stall_o ( /*unused*/) - ); - - stream_fork_dynamic #( - .N_OUP(CcuCfg.u.SlvPorts) - ) u_ace_ccu_ac_fork ( - .clk_i, - .rst_ni, - .valid_i (ac_valid), - .ready_o (ac_ready), - .sel_i (ac_bv), - .sel_valid_i(1'b1), - .sel_ready_o(), - .valid_o (snoop_ac_valid), - .ready_i (snoop_ac_ready) - ); - - assign snoop_ac = {CcuCfg.u.SlvPorts{ac}}; - - stream_join_dynamic #( - .N_INP(CcuCfg.u.SlvPorts) - ) u_cr_join ( - .inp_valid_i(snoop_cr_valid), - .inp_ready_o(snoop_cr_ready), - .sel_i (cr_bv), - .oup_valid_o(cr_valid), - .oup_ready_i(cr_ready) - ); - - always_comb begin : cr_merge_comb - cr = '0; - pipe_cd_bv = '0; - for (int unsigned i = 0; i < CcuCfg.u.SlvPorts; i++) begin - if (cr_bv[i]) begin - cr |= snoop_cr[i]; - pipe_cd_bv[i] = snoop_cr[i].DataTransfer; - end - end - end - // }}} - - // Shareable W buffer - // {{{ - stream_fifo #( - .FALL_THROUGH(1'b0), - .DEPTH (CcuCfg.u.ShareableWFifoDepth), - .T (w_t) - ) u_shareable_w_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .usage_o (), - .data_i (ccu_shareable_req.w), - .valid_i (ccu_shareable_req.w_valid), - .ready_o (ccu_shareable_resp.w_ready), - .data_o (write_w), - .valid_o (write_w_valid), - .ready_i (write_w_ready) - ); - // }}} - - // Tracker - // {{{ - assign tracker_dealloc_r_resp = ccu_shareable_req.r_ready && ccu_shareable_resp.r_valid && ccu_shareable_resp.r.last; - assign tracker_dealloc_r_resp_id = ccu_shareable_resp.r.id; - assign tracker_dealloc_check_b_resp = axi_shareable_resp.b_valid; - assign tracker_dealloc_b_resp = ccu_shareable_req.b_ready && ccu_shareable_resp.b_valid; - assign tracker_dealloc_b_resp_id = ccu_shareable_resp.b.id; - - ace_ccu_tracker #( - .CcuCfg (CcuCfg), - .slv_bv_t (slv_bv_t), - .slv_idx_t (slv_idx_t), - .nline_t (nline_t), - .midend_id_t(midend_id_t), - .tid_t (tid_t) - ) u_ace_ccu_tracker ( - .clk_i, - .rst_ni, - .full_o (tracker_full), - .empty_o ( /*unused*/), - .check_i (tracker_check), - .check_hit_o (tracker_check_hit), - .alloc_i (tracker_alloc), - .alloc_b_i (tracker_alloc_b), - .alloc_r_i (tracker_alloc_r), - .alloc_nline_i (tracker_alloc_nline), - .alloc_id_i (tracker_alloc_id), - .alloc_tid_o (tracker_alloc_tid), - .dealloc_rack_i (ccu_shareable_req.rack), - .dealloc_wack_i (ccu_shareable_req.wack), - .dealloc_r_resp_i (tracker_dealloc_r_resp), - .dealloc_r_resp_id_i (tracker_dealloc_r_resp_id), - .dealloc_b_resp_i (tracker_dealloc_b_resp), - .dealloc_check_b_resp_i(tracker_dealloc_check_b_resp), - .dealloc_b_resp_id_i (tracker_dealloc_b_resp_id), - .evt_hit_id_o ( /*unused*/), - .evt_hit_nline_o ( /*unused*/) - ); - // }}} - - - // Replay table - // {{{ - if (CcuCfg.u.ReplayEn) begin : gen_replay - // TODO: implement replay table to put AR requests - // with address conflicts on hold - $fatal(-1, "Replay table not yet implemented."); - end else begin : gen_no_replay - assign replay_full = 1'b0; - assign replay_hit = 1'b0; - assign replay_ar = '0; - assign replay_ar_valid = 1'b0; - end - // }}} - - // Write Unit - // {{{ - ace_ccu_write #( - .CcuCfg (CcuCfg), - .midend_ax_t (midend_ax_t), - .tid_t (tid_t), - .backend_aw_t(backend_aw_t), - .w_t (w_t), - .midend_b_t (midend_b_t), - .backend_b_t (backend_b_t) - ) u_ace_ccu_write_unit ( - .clk_i, - .rst_ni, - .valid_i (write_valid), - .ready_o (write_ready), - .ax_i (pipe_ax), - .ax_is_write_i (pipe_ax_is_write), - .ax_is_writeback_i(pipe_cd_ctrl_write), - .ax_tid_i (pipe_ax_tid), - .w_i (write_w), - .w_valid_i (write_w_valid), - .w_ready_o (write_w_ready), - .cd_w_i (cd_w), - .cd_w_valid_i (cd_w_valid), - .cd_w_ready_o (cd_w_ready), - .b_o (ccu_shareable_resp.b), - .b_valid_o (ccu_shareable_resp.b_valid), - .b_ready_i (ccu_shareable_req.b_ready), - .aw_o (axi_shareable_req.aw), - .aw_valid_o (axi_shareable_req.aw_valid), - .aw_ready_i (axi_shareable_resp.aw_ready), - .w_o (axi_shareable_req.w), - .w_valid_o (axi_shareable_req.w_valid), - .w_ready_i (axi_shareable_resp.w_ready), - .b_i (axi_shareable_resp.b), - .b_valid_i (axi_shareable_resp.b_valid), - .b_ready_o (axi_shareable_req.b_ready) - ); - // }}} - - // Read Unit - // {{{ - ace_ccu_read #( - .CcuCfg (CcuCfg), - .midend_ax_t (midend_ax_t), - .tid_t (tid_t), - .backend_ar_t(backend_ar_t), - .backend_r_t (backend_r_t), - .midend_r_t (midend_r_t), - .slv_idx_t (slv_idx_t) - ) u_ace_ccu_read_unit ( - .clk_i, - .rst_ni, - .valid_i (read_valid), - .ready_o (read_ready), - .ax_i (pipe_ax), - .cd_r_i (cd_r), - .cd_r_valid_i(cd_r_valid), - .cd_r_ready_o(cd_r_ready), - .r_o (ccu_shareable_resp.r), - .r_valid_o (ccu_shareable_resp.r_valid), - .r_ready_i (ccu_shareable_req.r_ready), - .ar_o (axi_shareable_req.ar), - .ar_valid_o (axi_shareable_req.ar_valid), - .ar_ready_i (axi_shareable_resp.ar_ready), - .r_i (axi_shareable_resp.r), - .r_valid_i (axi_shareable_resp.r_valid), - .r_ready_o (axi_shareable_req.r_ready) - ); - // }}} - - - // CD Ctrl Unit - // {{{ - ace_ccu_cd_ctrl #( - .CcuCfg (CcuCfg), - .midend_ax_t(midend_ax_t), - .midend_id_t(midend_id_t), - .user_t (user_t), - .cd_t (snoop_cd_t), - .slv_bv_t (slv_bv_t), - .w_t (w_t), - .midend_r_t (midend_r_t) - ) u_ace_ccu_cd_ctrl ( - .clk_i, - .rst_ni, - .valid_i (cd_ctrl_valid), - .ready_o (cd_ctrl_ready), - .ax_i (pipe_ax), - .cd_ctrl_write_i(pipe_cd_ctrl_write), - .cd_ctrl_read_i (pipe_cd_ctrl_read), - .cd_bv_i (pipe_cd_bv), - .r_resp_shared_i(pipe_r_resp_shared), - .r_resp_dirty_i (pipe_r_resp_dirty), - .cd_i (snoop_cd), - .cd_valid_i (snoop_cd_valid), - .cd_ready_o (snoop_cd_ready), - .w_o (cd_w), - .w_valid_o (cd_w_valid), - .w_ready_i (cd_w_ready), - .r_o (cd_r), - .r_valid_o (cd_r_valid), - .r_ready_i (cd_r_ready) - ); - // }}} - - // Mst mux - // {{{ - `ACE_TO_AXI_ASSIGN_REQ(axi_nonshareable_req, ccu_nonshareable_req) - `AXI_TO_ACE_ASSIGN_RESP(ccu_nonshareable_resp, axi_nonshareable_resp) - - axi_mux #( - .SlvAxiIDWidth(CcuCfg.AxiBackendIdWidth), - .slv_aw_chan_t(backend_aw_t), - .mst_aw_chan_t(mst_aw_t), - .w_chan_t (w_t), - .slv_b_chan_t (backend_b_t), - .mst_b_chan_t (mst_b_t), - .slv_ar_chan_t(backend_ar_t), - .mst_ar_chan_t(mst_ar_t), - .slv_r_chan_t (backend_r_t), - .mst_r_chan_t (mst_r_t), - .slv_req_t (backend_req_t), - .slv_resp_t (backend_resp_t), - .mst_req_t (mst_req_t), - .mst_resp_t (mst_resp_t), - .NoSlvPorts (2), - .MaxWTrans (32'd8), - .FallThrough (1'b1), - .SpillAw (1'b0), - .SpillW (1'b0), - .SpillB (1'b0), - .SpillAr (1'b0), - .SpillR (1'b0) - ) u_axi_mst_mux ( - .clk_i, - .rst_ni, - .test_i (1'b0), - .slv_reqs_i ({axi_nonshareable_req, axi_shareable_req}), - .slv_resps_o({axi_nonshareable_resp, axi_shareable_resp}), - .mst_req_o (mst_req), - .mst_resp_i (mst_resp) - ); - // }}} - - // ACE/AXI cuts - // {{{ - for (genvar i = 0; i < CcuCfg.u.SlvPorts; i++) begin : gen_snoop_cut - - snoop_req_t snoop_req; - snoop_resp_t snoop_resp; - - assign snoop_req.ac_valid = snoop_ac_valid[i]; - assign snoop_ac_ready[i] = snoop_resp.ac_ready; - assign snoop_req.ac = snoop_ac[i]; - - assign snoop_cr_valid[i] = snoop_resp.cr_valid; - assign snoop_req.cr_ready = snoop_cr_ready[i]; - assign snoop_cr[i] = snoop_resp.cr_resp; - - assign snoop_cd_valid[i] = snoop_resp.cd_valid; - assign snoop_req.cd_ready = snoop_cd_ready[i]; - assign snoop_cd[i] = snoop_resp.cd; - - ace_snoop_cut #( - .BypassAc (!CcuCfg.u.CutSnoopReq), - .BypassCr (!CcuCfg.u.CutSnoopResp), - .BypassCd (!CcuCfg.u.CutSnoopResp), - .ac_chan_t (snoop_ac_t), - .cd_chan_t (snoop_cd_t), - .cr_chan_t (snoop_cr_t), - .snoop_req_t (snoop_req_t), - .snoop_resp_t(snoop_resp_t) - ) u_snoop_cut ( - .clk_i, - .rst_ni, - .slv_req_i (snoop_req), - .slv_resp_o(snoop_resp), - .mst_req_o (snoop_req_o[i]), - .mst_resp_i(snoop_resp_i[i]) - ); - end - - axi_cut #( - .BypassAw (!CcuCfg.u.CutMstReq), - .BypassW (!CcuCfg.u.CutMstReq), - .BypassB (!CcuCfg.u.CutMstResp), - .BypassAr (!CcuCfg.u.CutMstReq), - .BypassR (!CcuCfg.u.CutMstResp), - .aw_chan_t (mst_aw_t), - .w_chan_t (w_t), - .b_chan_t (mst_b_t), - .ar_chan_t (mst_ar_t), - .r_chan_t (mst_r_t), - .axi_req_t (mst_req_t), - .axi_resp_t(mst_resp_t) - ) u_mst_cut ( - .clk_i, - .rst_ni, - .slv_req_i (mst_req), - .slv_resp_o(mst_resp), - .mst_req_o (mst_req_o), - .mst_resp_i(mst_resp_i) - ); - // }}} - -endmodule - -module ace_ccu_top_intf - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CCU_CFG = '{default: '0}, - localparam type domain_bv_t = `DOMAIN_BV_T(CCU_CFG.u.SlvPorts), - localparam type domain_rule_t = `DOMAIN_RULE_T(domain_bv_t) -) ( - input logic clk_i, - input logic rst_ni, - input domain_rule_t [CCU_CFG.u.SlvPorts-1:0] domain_rule_i, - ACE_BUS.Slave slv [CCU_CFG.u.SlvPorts], - SNOOP_BUS.Slave snoop [CCU_CFG.u.SlvPorts], - AXI_BUS.Master mst -); - - typedef logic [CCU_CFG.u.AxiSlvIdWidth-1:0] slv_id_t; - typedef logic [CCU_CFG.AxiMstIdWidth-1:0] mst_id_t; - typedef logic [CCU_CFG.u.AxiAddrWidth-1:0] addr_t; - typedef logic [CCU_CFG.u.AxiDataWidth-1:0] data_t; - typedef logic [CCU_CFG.u.AxiDataWidth/8-1:0] strb_t; - typedef logic [CCU_CFG.u.AxiUserWidth-1:0] user_t; - - `ACE_TYPEDEF_AW_CHAN_T(slv_aw_t, addr_t, slv_id_t, user_t) - `AXI_TYPEDEF_W_CHAN_T(w_t, data_t, strb_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(slv_b_t, slv_id_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(slv_ar_t, addr_t, slv_id_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(slv_r_t, data_t, slv_id_t, user_t) - `ACE_TYPEDEF_REQ_T(slv_req_t, slv_aw_t, w_t, slv_ar_t) - `ACE_TYPEDEF_RESP_T(slv_resp_t, slv_b_t, slv_r_t) - - `AXI_TYPEDEF_AW_CHAN_T(mst_aw_t, addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(mst_b_t, mst_id_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(mst_ar_t, addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_R_CHAN_T(mst_r_t, data_t, mst_id_t, user_t) - `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_t, w_t, mst_ar_t) - `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_t, mst_r_t) - - `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) - `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) - `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) - `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) - `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) - - slv_req_t [CCU_CFG.u.SlvPorts-1:0] slv_req; - slv_resp_t [CCU_CFG.u.SlvPorts-1:0] slv_resp; - - mst_req_t mst_req; - mst_resp_t mst_resp; - - snoop_req_t [CCU_CFG.u.SlvPorts-1:0] snoop_req; - snoop_resp_t [CCU_CFG.u.SlvPorts-1:0] snoop_resp; - - for (genvar i = 0; i < CCU_CFG.u.SlvPorts; i++) begin : gen_bus_assignments - `ACE_ASSIGN_TO_REQ(slv_req[i], slv[i]) - `ACE_ASSIGN_FROM_RESP(slv[i], slv_resp[i]) - `SNOOP_ASSIGN_FROM_REQ(snoop[i], snoop_req[i]) - `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop[i]) - end - - `AXI_ASSIGN_FROM_REQ(mst, mst_req) - `AXI_ASSIGN_TO_RESP(mst_resp, mst) - - ace_ccu_top #( - .CcuCfg (CCU_CFG), - .domain_rule_t(domain_rule_t), - .slv_ar_t (slv_ar_t), - .slv_aw_t (slv_aw_t), - .w_t (w_t), - .slv_b_t (slv_b_t), - .slv_r_t (slv_r_t), - .slv_req_t (slv_req_t), - .slv_resp_t (slv_resp_t), - .mst_ar_t (mst_ar_t), - .mst_aw_t (mst_aw_t), - .mst_b_t (mst_b_t), - .mst_r_t (mst_r_t), - .mst_req_t (mst_req_t), - .mst_resp_t (mst_resp_t), - .snoop_ac_t (snoop_ac_t), - .snoop_cr_t (snoop_cr_t), - .snoop_cd_t (snoop_cd_t), - .snoop_req_t (snoop_req_t), - .snoop_resp_t (snoop_resp_t) - ) u_ace_ccu ( - .clk_i, - .rst_ni, - .slv_req_i (slv_req), - .slv_resp_o (slv_resp), - .domain_rule_i(domain_rule_i), - .snoop_req_o (snoop_req), - .snoop_resp_i (snoop_resp), - .mst_req_o (mst_req), - .mst_resp_i (mst_resp) - ); - -endmodule diff --git a/src/ccu/ace_ccu_tracker.sv b/src/ccu/ace_ccu_tracker.sv deleted file mode 100644 index 468b8fe..0000000 --- a/src/ccu/ace_ccu_tracker.sv +++ /dev/null @@ -1,223 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -module ace_ccu_tracker - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type slv_bv_t = logic, - parameter type slv_idx_t = logic, - parameter type nline_t = logic, - parameter type midend_id_t = logic, - parameter type tid_t = logic -) ( - input logic clk_i, - input logic rst_ni, - - output logic full_o, - output logic empty_o, - - // Check/alloc interface - // {{{ - input logic check_i, - output logic check_hit_o, - input logic alloc_i, - input logic alloc_b_i, - input logic alloc_r_i, - input nline_t alloc_nline_i, - input midend_id_t alloc_id_i, - output tid_t alloc_tid_o, - // }}} - - // Lookup/dealloc interface - // {{{ - input logic dealloc_rack_i, - input logic dealloc_wack_i, - input logic dealloc_r_resp_i, - input midend_id_t dealloc_r_resp_id_i, - input logic dealloc_b_resp_i, - input logic dealloc_check_b_resp_i, - input midend_id_t dealloc_b_resp_id_i, - // }}} - - // Performance events - // {{{ - output logic evt_hit_id_o, - output logic evt_hit_nline_o - // }}} -); - - // Typedefs - // {{{ - typedef struct packed { - logic r; - logic b; - } meta_t; - - typedef struct packed { - nline_t nline; - midend_id_t id; - } data_t; - // }}} - - // Internal signals - // {{{ - logic [CcuCfg.u.MaxTransactions-1:0] valid_q; - logic [CcuCfg.u.MaxTransactions-1:0] valid_d; - logic [CcuCfg.u.MaxTransactions-1:0] valid_set; - logic [CcuCfg.u.MaxTransactions-1:0] valid_clr; - - meta_t [CcuCfg.u.MaxTransactions-1:0] meta_q; - meta_t [CcuCfg.u.MaxTransactions-1:0] meta_d; - meta_t [CcuCfg.u.MaxTransactions-1:0] meta_set; - meta_t [CcuCfg.u.MaxTransactions-1:0] meta_clr; - data_t [CcuCfg.u.MaxTransactions-1:0] data_q; - - logic [CcuCfg.u.MaxTransactions-1:0] hit_id_bv; - logic [CcuCfg.u.MaxTransactions-1:0] hit_nline_bv; - logic hit_id; - logic hit_nline; - - tid_t rack_queue_wdata; - tid_t wack_queue_wdata; - tid_t rack_queue_rdata; - tid_t wack_queue_rdata; - // }}} - - // Alloc logic - // {{{ - for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_alloc - assign valid_set[i] = alloc_i && (i == alloc_tid_o); - assign meta_set[i].r = valid_set[i] && alloc_r_i; - assign meta_set[i].b = valid_set[i] && alloc_b_i; - end - - always_comb begin : alloc_tid_comb - alloc_tid_o = '0; - for (int unsigned i = 0; i < CcuCfg.u.MaxTransactions; i++) begin - if (!valid_q[i]) begin - alloc_tid_o = CcuCfg.TransactionIdxWidth'(i); - break; - end - end - end - // }}} - - // Dealloc logic - // {{{ - for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_dealloc - assign meta_clr[i].r = dealloc_rack_i && (i == rack_queue_rdata); - assign meta_clr[i].b = dealloc_wack_i && (i == wack_queue_rdata); - assign valid_clr[i] = ~|meta_d[i]; - end - - always_comb begin : xack_queue_wdata_mux - rack_queue_wdata = '0; - wack_queue_wdata = '0; - - for (int unsigned i = 0; i < CcuCfg.u.MaxTransactions; i++) begin - if (dealloc_b_resp_id_i == data_q[i].id && valid_q[i]) - wack_queue_wdata = CcuCfg.TransactionIdxWidth'(i); - if (dealloc_r_resp_id_i == data_q[i].id && valid_q[i]) - rack_queue_wdata = CcuCfg.TransactionIdxWidth'(i); - end - end - - // Push an entry ID to the wack/rack queues once a response handshake happens - fifo_v3 #( - .FALL_THROUGH(1'b0), - .DEPTH (CcuCfg.u.MaxTransactions), - .dtype (tid_t) - ) u_tracker_wack_queue ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .full_o (), - .empty_o (), - .usage_o (), - .data_i (wack_queue_wdata), - .push_i (dealloc_b_resp_i), - .data_o (wack_queue_rdata), - .pop_i (dealloc_wack_i) - ); - - fifo_v3 #( - .FALL_THROUGH(1'b0), - .DEPTH (CcuCfg.u.MaxTransactions), - .dtype (tid_t) - ) u_tracker_rack_queue ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .testmode_i(1'b0), - .full_o (), - .empty_o (), - .usage_o (), - .data_i (rack_queue_wdata), - .push_i (dealloc_r_resp_i), - .data_o (rack_queue_rdata), - .pop_i (dealloc_rack_i) - ); - // }}} - - // State holding elements - // {{{ - for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_ffs - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - data_q[i] <= '0; - end else if (valid_set[i]) begin - data_q[i] <= '{nline: alloc_nline_i, id: alloc_id_i}; - end - end - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - meta_q[i] <= '0; - valid_q[i] <= 1'b0; - end else begin - meta_q[i] <= meta_d[i]; - valid_q[i] <= valid_d[i]; - end - end - - assign meta_d[i] = (meta_set[i] & ~meta_q[i]) | (~meta_clr[i] & meta_q[i]); - assign valid_d[i] = (valid_set[i] & ~valid_q[i]) | (~valid_clr[i] & valid_q[i]); - end - // }}} - - // Check logic - // {{{ - for (genvar i = 0; i < CcuCfg.u.MaxTransactions; i++) begin : gen_check - assign hit_id_bv[i] = valid_q[i] && (data_q[i].id == alloc_id_i); - assign hit_nline_bv[i] = valid_q[i] && (data_q[i].nline == alloc_nline_i); - end - - assign hit_id = |hit_id_bv; - assign hit_nline = |hit_nline_bv; - assign check_hit_o = check_i && (hit_id || hit_nline); - // }}} - - // Global control - // {{{ - assign full_o = (valid_q == '1); - assign empty_o = (valid_q == '0); - // }}} - - // Performance events - // {{{ - assign evt_hit_id_o = check_i && hit_id; - assign evt_hit_nline_o = check_i && hit_nline; - // }}} - -endmodule diff --git a/src/ccu/ace_ccu_write.sv b/src/ccu/ace_ccu_write.sv deleted file mode 100644 index bffc709..0000000 --- a/src/ccu/ace_ccu_write.sv +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright (c) 2025 ETH Zurich, University of Bologna -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -`include "axi/assign.svh" - -module ace_ccu_write - import ace_pkg::*; - import ace_ccu_pkg::*; -#( - parameter ace_ccu_cfg_t CcuCfg = '{default: '0}, - parameter type midend_ax_t = logic, - parameter type tid_t = logic, - parameter type backend_aw_t = logic, - parameter type w_t = logic, - parameter type midend_b_t = logic, - parameter type backend_b_t = logic -) ( - input logic clk_i, - input logic rst_ni, - - // Ctrl - input logic valid_i, - output logic ready_o, - input midend_ax_t ax_i, - input logic ax_is_write_i, - input logic ax_is_writeback_i, - input tid_t ax_tid_i, - // Slv interface - input w_t w_i, - input logic w_valid_i, - output logic w_ready_o, - input w_t cd_w_i, - input logic cd_w_valid_i, - output logic cd_w_ready_o, - output midend_b_t b_o, - output logic b_valid_o, - input logic b_ready_i, - // Mst interface - output backend_aw_t aw_o, - output logic aw_valid_o, - input logic aw_ready_i, - output w_t w_o, - output logic w_valid_o, - input logic w_ready_i, - input backend_b_t b_i, - input logic b_valid_i, - output logic b_ready_o -); - // Typedefs - // {{{ - typedef struct packed { - midend_ax_t ax; - logic ax_is_write; - logic ax_is_writeback; - tid_t ax_tid; - } aw_sync_reg_t; - - typedef enum { - AW_FSM_IDLE, - AW_FSM_WAIT_B_RESP, - AW_FSM_PASSTHROUGH - } aw_fsm_e; - // }}} - - // Internal signals - // {{{ - aw_sync_reg_t aw_sync_wdata; - aw_sync_reg_t aw_sync_rdata; - logic aw_sync_valid; - logic aw_sync_ready; - logic aw_fsm_valid; - logic aw_fsm_ready; - logic aw_is_writeback; - aw_fsm_e aw_fsm_d; - aw_fsm_e aw_fsm_q; - - logic w_ctrl_fifo_valid_in; - logic w_ctrl_fifo_ready_in; - logic w_ctrl_fifo_valid_out; - logic w_ctrl_fifo_ready_out; - logic w_mux_valid_out; - logic w_mux_ready_out; - logic w_is_write_back; - logic b_is_write_back; - // }}} - - // AW channel - // {{{ - - // Decouple AW handling from snoop pipe - assign aw_sync_wdata = '{ - ax: ax_i, - ax_is_write: ax_is_write_i, - ax_is_writeback: ax_is_writeback_i, - ax_tid: ax_tid_i - }; - - fall_through_register #( - .T(aw_sync_reg_t) - ) u_aw_sync_reg ( - .clk_i, - .rst_ni, - .clr_i (1'b0), - .testmode_i(1'b0), - .valid_i (valid_i), - .ready_o (ready_o), - .data_i (aw_sync_wdata), - .valid_o (aw_sync_valid), - .ready_i (aw_sync_ready), - .data_o (aw_sync_rdata) - ); - - always_comb begin : aw_writeback_fsm_comb - aw_fsm_d = aw_fsm_q; - - aw_is_writeback = 1'b0; - aw_fsm_valid = aw_sync_valid; - aw_sync_ready = aw_fsm_ready; - - case (aw_fsm_q) - AW_FSM_IDLE: begin - if (aw_sync_rdata.ax_is_writeback) begin - // A writeback is pending - aw_is_writeback = 1'b1; - if (aw_fsm_valid && aw_fsm_ready) begin - // The writeback request is done - if (aw_sync_rdata.ax_is_write) begin - // A write is also pending - aw_fsm_d = AW_FSM_WAIT_B_RESP; - aw_sync_ready = 1'b0; - end - end - end - end - AW_FSM_WAIT_B_RESP: begin - aw_fsm_valid = 1'b0; - aw_sync_ready = 1'b0; - - if (b_valid_i && b_ready_o && {1'b1, aw_sync_rdata.ax.id} == b_i.id) begin - // The writeback response is received - // The pending write can be sent - aw_fsm_d = AW_FSM_PASSTHROUGH; - end - end - AW_FSM_PASSTHROUGH: begin - // Let the handshake complete - if (aw_fsm_valid && aw_fsm_ready) begin - aw_fsm_d = AW_FSM_IDLE; - end - end - endcase - end - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - aw_fsm_q <= AW_FSM_IDLE; - end else begin - aw_fsm_q <= aw_fsm_d; - end - end - - always_comb begin : aw_mux_comb - aw_o = '0; - - `AXI_SET_AW_STRUCT(aw_o, aw_sync_rdata.ax) - - if (aw_is_writeback) begin - // Use the MSB ID bit to indicate a writeback - aw_o.id[CcuCfg.AxiBackendIdWidth-1] = 1'b1; - // Pass a full cacheline - aw_o.addr = axi_pkg::aligned_addr(aw_sync_rdata.ax.addr, CcuCfg.CachelineBytesIdxWidth); - aw_o.len = CcuCfg.CachelineAxiTransfers - 1; - aw_o.size = CcuCfg.AxiDataBytesIdxWidth; - // Burst type for write backs - aw_o.burst = axi_pkg::BURST_WRAP; - // The write back is not atomic - aw_o.lock = 1'b0; - aw_o.atop = '0; - end - end - - stream_fork #( - .N_OUP(2) - ) u_aw_fork ( - .clk_i, - .rst_ni, - .valid_i(aw_fsm_valid), - .ready_o(aw_fsm_ready), - .valid_o({aw_valid_o, w_ctrl_fifo_valid_in}), - .ready_i({aw_ready_i, w_ctrl_fifo_ready_in}) - ); - // }}} - - - // W channel - // {{{ - stream_fifo #( - .FALL_THROUGH(1'b1), - .DATA_WIDTH (1), - .DEPTH (2) - ) u_w_ctrl_fifo ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i (1'b0), - .testmode_i(1'b0), - .usage_o (), - .data_i (aw_is_writeback), - .valid_i (w_ctrl_fifo_valid_in), - .ready_o (w_ctrl_fifo_ready_in), - .data_o (w_is_write_back), - .valid_o (w_ctrl_fifo_valid_out), - .ready_i (w_ctrl_fifo_ready_out && w_o.last) - ); - - stream_mux #( - .DATA_T(w_t), - .N_INP (2) - ) u_w_mux ( - .inp_data_i ({cd_w_i, w_i}), - .inp_valid_i({cd_w_valid_i, w_valid_i}), - .inp_ready_o({cd_w_ready_o, w_ready_o}), - .inp_sel_i (w_is_write_back), - .oup_data_o (w_o), - .oup_valid_o(w_mux_valid_out), - .oup_ready_i(w_mux_ready_out) - ); - - stream_join #( - .N_INP(2) - ) u_w_join ( - .inp_valid_i({w_ctrl_fifo_valid_out, w_mux_valid_out}), - .inp_ready_o({w_ctrl_fifo_ready_out, w_mux_ready_out}), - .oup_valid_o(w_valid_o), - .oup_ready_i(w_ready_i) - ); - // }}} - - // B channel - // {{{ - stream_filter u_b_filter ( - .valid_i(b_valid_i), - .ready_o(b_ready_o), - .drop_i (b_is_write_back), - .valid_o(b_valid_o), - .ready_i(b_ready_i) - ); - - assign b_is_write_back = b_i.id[CcuCfg.AxiBackendIdWidth-1]; - - `AXI_ASSIGN_B_STRUCT(b_o, b_i) - // }}} - -endmodule diff --git a/src/ccu/ccu_exclusive_monitor.sv b/src/ccu/ccu_exclusive_monitor.sv new file mode 100644 index 0000000..f5b1dc7 --- /dev/null +++ b/src/ccu/ccu_exclusive_monitor.sv @@ -0,0 +1,229 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "axi/assign.svh" +`include "ace/assign.svh" + +module ccu_exclusive_monitor + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + + parameter type ccu_ace_ar_t = logic, + parameter type ccu_ace_r_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input logic [ccuCfg.u.numSubordinates-1:0] dealloc_i, + output logic [ccuCfg.u.numSubordinates-1:0] sc_fail_o, + + output logic [ccuCfg.u.numSubordinates-1:0] r_id_hit_o, + + input ccu_ace_ar_t [ccuCfg.u.numSubordinates-1:0] ar_i, + input logic [ccuCfg.u.numSubordinates-1:0] ar_valid_i, + output logic [ccuCfg.u.numSubordinates-1:0] ar_ready_o, + output ccu_ace_r_t [ccuCfg.u.numSubordinates-1:0] r_o, + output logic [ccuCfg.u.numSubordinates-1:0] r_valid_o, + input logic [ccuCfg.u.numSubordinates-1:0] r_ready_i, + + output ccu_ace_ar_t [ccuCfg.u.numSubordinates-1:0] ar_o, + output logic [ccuCfg.u.numSubordinates-1:0] ar_valid_o, + input logic [ccuCfg.u.numSubordinates-1:0] ar_ready_i, + input ccu_ace_r_t [ccuCfg.u.numSubordinates-1:0] r_i, + input logic [ccuCfg.u.numSubordinates-1:0] r_valid_i, + output logic [ccuCfg.u.numSubordinates-1:0] r_ready_o +); + +typedef struct packed { + logic [ccuCfg.u.axiSubordinateIdWidth-1:0] id; +} exclusive_monitor_entry_t; + +typedef struct packed { + logic [ccuCfg.u.axiSubordinateIdWidth-1:0] id; + logic [ccuCfg.u.axiUserWidth-1:0] user; +} r_register_entry_t; + +exclusive_monitor_entry_t [ccuCfg.u.numSubordinates-1:0] entry_q; +exclusive_monitor_entry_t [ccuCfg.u.numSubordinates-1:0] entry_d; +logic [ccuCfg.u.numSubordinates-1:0] valid_q; +logic [ccuCfg.u.numSubordinates-1:0] valid_d; +logic [ccuCfg.u.numSubordinates-1:0] lock_q; +logic [ccuCfg.u.numSubordinates-1:0] lock_d; + +logic [ccuCfg.u.numSubordinates-1:0] exclusive_store_pass; + +for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_entry + logic is_exclusive_sequence; + logic is_exclusive_load; + logic is_exclusive_store; + logic reservation_set; + logic reservation_reset; + logic exclusive_store_will_fail; + + r_register_entry_t r_register_wdata; + r_register_entry_t r_register_rdata; + logic r_register_valid; + logic r_register_ready; + logic r_ack_valid; + logic r_ack_ready; + ccu_ace_r_t r_ack; + + logic ar_valid; + logic ar_ready; + + assign r_id_hit_o[s] = entry_q[s].id == r_o[s].id; + + assign is_exclusive_load = ar_i[s].lock && ( + ace_is_read_clean ( + ar_i[s].bar, + ar_i[s].domain, + ar_i[s].snoop + ) || + ace_is_read_shared( + ar_i[s].bar, + ar_i[s].domain, + ar_i[s].snoop + )); + + assign is_exclusive_store = ar_i[s].lock && + ace_is_clean_unique( + ar_i[s].bar, + ar_i[s].domain, + ar_i[s].snoop + ); + + assign is_exclusive_sequence = is_exclusive_store || is_exclusive_load; + + assign reservation_set = ar_valid_i[s] && ar_ready_o[s] && is_exclusive_sequence; + assign reservation_reset = |exclusive_store_pass && !exclusive_store_pass[s]; + assign exclusive_store_pass[s] = ar_valid_i[s] && ar_ready_o[s] && is_exclusive_store && valid_q[s]; + assign exclusive_store_will_fail = !valid_q[s] && is_exclusive_store; + + always_comb begin + entry_d[s] = entry_q[s]; + valid_d[s] = valid_q[s]; + lock_d [s] = lock_q [s]; + + if (dealloc_i[s]) begin + lock_d[s] = 1'b0; + end else if (exclusive_store_pass[s]) begin + lock_d[s] = 1'b1; + end else if (reservation_reset) begin + valid_d[s] = 1'b0; + end else if (reservation_set) begin + valid_d[s] = 1'b1; + entry_d[s].id = ar_i[s].id; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + entry_q[s] <= '0; + valid_q[s] <= 1'b0; + lock_q [s] <= 1'b0; + end else begin + entry_q[s] <= entry_d[s]; + valid_q[s] <= valid_d[s]; + lock_q [s] <= lock_d [s]; + end + end + + assign ar_valid = ar_valid_i[s] && + !(is_exclusive_sequence && |lock_q && !lock_q[s]); + assign ar_ready_o[s] = ar_ready && + !(is_exclusive_sequence && |lock_q && !lock_q[s]); + + stream_demux #( + .N_OUP (2) + ) u_ar_demux ( + .inp_valid_i (ar_valid), + .inp_ready_o (ar_ready), + .oup_sel_i (exclusive_store_will_fail), + .oup_valid_o ({r_register_valid, ar_valid_o[s]}), + .oup_ready_i ({r_register_ready, ar_ready_i[s]}) + ); + + `ACE_ASSIGN_AR_STRUCT(ar_o[s], ar_i[s]) + + assign r_register_wdata = '{ + id: ar_i[s].id, + user: ar_i[s].user + }; + + stream_register #( + .T (r_register_entry_t) + ) u_r_register ( + .clk_i, + .rst_ni, + .clr_i (1'b0), + .testmode_i (1'b0), + .valid_i (r_register_valid), + .ready_o (r_register_ready), + .data_i (r_register_wdata), + .valid_o (r_ack_valid), + .ready_i (r_ack_ready), + .data_o (r_register_rdata) + ); + + assign r_ack = '{ + id: r_register_rdata.id, + data: r_i[s].data, // Don't care + resp: {2'b00, axi_pkg::RESP_OKAY}, + last: 1'b1, + user: r_register_rdata.user + }; + + logic [1:0] r_arbiter_valid; + logic [1:0] r_arbiter_ready; + logic [1:0] mask_d; + logic [1:0] mask_q; + + assign r_arbiter_valid = {r_ack_valid, r_valid_i[s]} & ~mask_q; + assign {r_ack_ready, r_ready_o[s]} = r_arbiter_ready & ~mask_q; + + always_comb begin : mask_comb + mask_d = mask_q; + if (r_valid_o[s] && r_ready_i[s] && r_o[s].last) + mask_d = '0; + else if (r_valid_o[s] && r_ready_i[s]) + mask_d = ~(r_arbiter_valid & r_arbiter_ready); + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) mask_q <= '0; + else mask_q <= mask_d; + end + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ace_r_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_r_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (r_arbiter_valid), + .gnt_o (r_arbiter_ready), + .data_i ({r_ack, r_i[s]}), + .req_o (r_valid_o[s]), + .gnt_i (r_ready_i[s]), + .data_o (r_o[s]), + .idx_o (sc_fail_o[s]) + ); +end + +endmodule diff --git a/src/ccu/ccu_frontend.sv b/src/ccu/ccu_frontend.sv new file mode 100644 index 0000000..757bd02 --- /dev/null +++ b/src/ccu/ccu_frontend.sv @@ -0,0 +1,268 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "ace/assign.svh" + +module ccu_frontend + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + + parameter type ccu_ace_manager_ar_t = logic, + parameter type ccu_ace_manager_aw_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_ace_manager_r_t = logic, + parameter type ccu_ace_manager_b_t = logic, + parameter type ccu_ace_manager_req_t = logic, + parameter type ccu_ace_manager_resp_t = logic, + + parameter type ccu_ace_subordinate_ar_t = logic, + parameter type ccu_ace_subordinate_aw_t = logic, + parameter type ccu_ace_subordinate_r_t = logic, + parameter type ccu_ace_subordinate_b_t = logic, + parameter type ccu_ace_subordinate_req_t = logic, + parameter type ccu_ace_subordinate_resp_t = logic, + + localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth + +) ( + input logic clk_i, + input logic rst_ni, + + input logic shareable_stall_i, + + input ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_req_i, + output ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_resp_o, + input logic [ccuCfg.u.numSubordinates-1:0] subordinate_rack_i, + input logic [ccuCfg.u.numSubordinates-1:0] subordinate_wack_i, + + output ccu_ace_manager_req_t manager_req_o, + input ccu_ace_manager_resp_t manager_resp_i, + + output logic scoreboard_dealloc_check_o, + output logic [ccuCfg.axiCcuIdWidth-1:0] scoreboard_dealloc_id_o, + input logic scoreboard_dealloc_hit_i, + input logic [scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_entry_i, + output logic [ccuCfg.u.numSubordinates-1:0] scoreboard_dealloc_o, + output logic [ccuCfg.u.numSubordinates-1:0][scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_entry_o +); + + typedef struct packed { + logic [ccuCfg.transactionIndexWidth-1:0] tid; + logic dealloc; + logic exclusive; + } rack_fifo_entry_t; + + ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_req; + ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_resp; + + ccu_ace_subordinate_ar_t [ccuCfg.u.numSubordinates-1:0] subordinate_ar; + logic [ccuCfg.u.numSubordinates-1:0] subordinate_ar_valid; + logic [ccuCfg.u.numSubordinates-1:0] subordinate_ar_ready; + ccu_ace_subordinate_r_t [ccuCfg.u.numSubordinates-1:0] subordinate_r; + logic [ccuCfg.u.numSubordinates-1:0] subordinate_r_valid; + logic [ccuCfg.u.numSubordinates-1:0] subordinate_r_ready; + + ccu_ace_subordinate_ar_t [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_ar; + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_ar_valid; + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_ar_ready; + ccu_ace_subordinate_r_t [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_r; + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_r_valid; + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_r_ready; + + + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_id_hit; + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_dealloc; + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_sc_fail; + + // Per-subordinate logic + // {{{ + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_subordinate_monitor + + logic ar_is_shareable; + logic rack_fifo_full; + rack_fifo_entry_t rack_fifo_wdata; + rack_fifo_entry_t rack_fifo_rdata; + logic rack_fifo_push; + logic rack_fifo_pop; + + assign ar_is_shareable = ace_ar_is_shareable( + subordinate_req_i[s].ar.bar[0], + subordinate_req_i[s].ar.domain, + subordinate_req_i[s].ar.snoop + ); + + always_comb begin : ar_comb + // Input request --> exclusive monitor + `ACE_SET_AR_STRUCT(subordinate_ar[s], subordinate_req_i[s].ar) + subordinate_ar_valid[s] = subordinate_req_i[s].ar_valid; + subordinate_resp_o[s].ar_ready = subordinate_ar_ready[s]; + + // Exclusive monitor --> mux + `ACE_SET_AR_STRUCT(subordinate_req[s].ar, exclusive_monitor_ar[s]) + subordinate_req[s].ar_valid = exclusive_monitor_ar_valid[s]; + exclusive_monitor_ar_ready[s] = subordinate_resp[s].ar_ready; + + if (ar_is_shareable && shareable_stall_i) begin + subordinate_req[s].ar_valid = 1'b0; + exclusive_monitor_ar_ready[s] = 1'b0; + end + end + + always_comb begin : r_comb + // Input request <-- exclusive monitor + `ACE_SET_R_STRUCT(subordinate_resp_o[s].r, subordinate_r[s]) + subordinate_resp_o[s].r_valid = subordinate_r_valid[s]; + subordinate_r_ready[s] = subordinate_req_i[s].r_ready; + + // Exclusive monitor <-- mux + `ACE_SET_R_STRUCT(exclusive_monitor_r[s], subordinate_resp[s].r) + exclusive_monitor_r_valid[s] = subordinate_resp[s].r_valid; + subordinate_req[s].r_ready = exclusive_monitor_r_ready[s]; + + if (rack_fifo_full) begin + exclusive_monitor_r_valid[s] = 1'b0; + subordinate_req[s].r_ready = 1'b0; + end + end + + always_comb begin : aw_comb + // Input request --> mux + `ACE_SET_AW_STRUCT(subordinate_req[s].aw, subordinate_req_i[s].aw) + subordinate_req[s].aw_valid = subordinate_req_i[s].aw_valid; + subordinate_resp_o[s].aw_ready = subordinate_resp[s].aw_ready; + end + + always_comb begin : w_comb + // Input request --> mux + `AXI_SET_W_STRUCT(subordinate_req[s].w, subordinate_req_i[s].w) + subordinate_req[s].w_valid = subordinate_req_i[s].w_valid; + subordinate_resp_o[s].w_ready = subordinate_resp[s].w_ready; + end + + always_comb begin : b_comb + // Input request <-- mux + `AXI_SET_B_STRUCT(subordinate_resp_o[s].b, subordinate_resp[s].b) + subordinate_resp_o[s].b_valid = subordinate_resp[s].b_valid; + subordinate_req[s].b_ready = subordinate_req_i[s].b_ready; + end + + + // The xACK signal is used to extend the lifetime of + // a transaction beyond the last R handshake. + // Since ACE uses xACK signals to trigger many events + // and xACK signals enforce FIFO ordering, a plain FIFO + // can be used to push and pop relevant metadata between + // a channel response and the associated xACK + assign rack_fifo_push = + subordinate_resp_o[s].r_valid && subordinate_req_i[s].r_ready && subordinate_resp_o[s].r.last; + + assign rack_fifo_wdata = '{ + tid: scoreboard_dealloc_entry_i, + dealloc: scoreboard_dealloc_hit_i && !exclusive_monitor_sc_fail[s], + exclusive: exclusive_monitor_id_hit[s] && !exclusive_monitor_sc_fail[s] + }; + + assign rack_fifo_pop = subordinate_rack_i[s]; + + fifo_v3 #( + .FALL_THROUGH (1'b0), + .DEPTH (2), + .dtype (rack_fifo_entry_t) + ) u_rack_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .empty_o (), + .full_o (rack_fifo_full), + .data_i (rack_fifo_wdata), + .push_i (rack_fifo_push), + .data_o (rack_fifo_rdata), + .pop_i (rack_fifo_pop) + ); + + assign scoreboard_dealloc_o[s] = subordinate_rack_i[s] && rack_fifo_rdata.dealloc; + assign scoreboard_dealloc_entry_o[s] = rack_fifo_rdata.tid; + assign exclusive_monitor_dealloc[s] = subordinate_rack_i[s] && rack_fifo_rdata.exclusive; + end + + // ACE exclusive monitor as prescribed in the specs + ccu_exclusive_monitor #( + .ccuCfg (ccuCfg), + .ccu_ace_ar_t (ccu_ace_subordinate_ar_t), + .ccu_ace_r_t (ccu_ace_subordinate_r_t) + ) u_ccu_exclusive_monitor ( + .clk_i, + .rst_ni, + .dealloc_i (exclusive_monitor_dealloc), + .sc_fail_o (exclusive_monitor_sc_fail), + .r_id_hit_o (exclusive_monitor_id_hit), + .ar_i (subordinate_ar), + .ar_valid_i (subordinate_ar_valid), + .ar_ready_o (subordinate_ar_ready), + .r_o (subordinate_r), + .r_valid_o (subordinate_r_valid), + .r_ready_i (subordinate_r_ready), + .ar_o (exclusive_monitor_ar), + .ar_valid_o (exclusive_monitor_ar_valid), + .ar_ready_i (exclusive_monitor_ar_ready), + .r_i (exclusive_monitor_r), + .r_valid_i (exclusive_monitor_r_valid), + .r_ready_o (exclusive_monitor_r_ready) + ); + // }}} + + // Point of Serialization (PoS) + // {{{ + axi_mux #( + .SlvAxiIDWidth (ccuCfg.u.axiSubordinateIdWidth), + .slv_aw_chan_t (ccu_ace_subordinate_aw_t), + .mst_aw_chan_t (ccu_ace_manager_aw_t), + .w_chan_t (ccu_w_t), + .slv_b_chan_t (ccu_ace_subordinate_b_t), + .mst_b_chan_t (ccu_ace_manager_b_t), + .slv_ar_chan_t (ccu_ace_subordinate_ar_t), + .mst_ar_chan_t (ccu_ace_manager_ar_t), + .slv_r_chan_t (ccu_ace_subordinate_r_t), + .mst_r_chan_t (ccu_ace_manager_r_t), + .slv_req_t (ccu_ace_subordinate_req_t), + .slv_resp_t (ccu_ace_subordinate_resp_t), + .mst_req_t (ccu_ace_manager_req_t), + .mst_resp_t (ccu_ace_manager_resp_t), + .NoSlvPorts (ccuCfg.u.numSubordinates), + .MaxWTrans (ccuCfg.u.numWriteTransactions), + .FallThrough (1'b1), + .SpillAw (1'b1), + .SpillW (1'b1), + .SpillB (1'b1), + .SpillAr (1'b0), + .SpillR (1'b0) + ) u_subordinate_mux ( + .clk_i, + .rst_ni, + .test_i (1'b0), + .slv_reqs_i (subordinate_req), + .slv_resps_o (subordinate_resp), + .mst_req_o (manager_req_o), + .mst_resp_i (manager_resp_i) + ); + // }}} + + // Scoreboard dealloc check + // {{{ + assign scoreboard_dealloc_check_o = + manager_resp_i.r_valid && manager_req_o.r_ready && manager_resp_i.r.last; + assign scoreboard_dealloc_id_o = manager_resp_i.r.id; + // }}} +endmodule diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv new file mode 100644 index 0000000..04f635f --- /dev/null +++ b/src/ccu/ccu_pkg.sv @@ -0,0 +1,83 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +package ccu_pkg; + + typedef struct packed { + // Number of subordinate ports (i.e. coherent managers) + int unsigned numSubordinates; + // Number of shareable simultaneous inflight transactions + int unsigned numShareableTransactions; + // Number of simultaneous write transactions + int unsigned numWriteTransactions; + // Number of simultaneous inflight snoop transactions + int unsigned numSnoopTransactions; + // AXI/ACE parameters + int unsigned axiAddressWidth; + int unsigned axiDataWidth; + int unsigned axiUserWidth; + int unsigned axiSubordinateIdWidth; + // Cache parameters + int unsigned cachelineWidth; + // LR/SC reservation buffer parameters + /* TODO */ + // LSB address bit used for hazard checks (inclusive) + int unsigned addressCheckLsb; + // MSB address bit used for hazard checks (inclusive) + int unsigned addressCheckMsb; + } ccu_user_config_t; + + typedef struct packed { + // User parameters + ccu_user_config_t u; + // Derived internal parameters + // Manager index width + int unsigned subordinateIndexWidth; + // ID width internal to the CCU + int unsigned axiCcuIdWidth; + // ID width of the manager interface of the CCU + int unsigned axiManagerIdWidth; + // Byte index in cacheline + int unsigned cachelineByteIndexWidth; + // Cacheline address minus offset + int unsigned numLineWidth; + // Write transaction index width + int unsigned writeTransactionIndexWidth; + // Number of transfers for a single cacheline + int unsigned cachelineAxiTransfers; + // Transaction index width + int unsigned transactionIndexWidth; + // AXI data size + int unsigned axiDataSize; + // Address slice width used for hazard checks + int unsigned addressCheckWidth; + } ccu_config_t; + + function automatic ccu_config_t ccu_build_cfg(ccu_user_config_t u); + ccu_config_t p; + + p.u = u; + + p.subordinateIndexWidth = $clog2(u.numSubordinates); + p.axiCcuIdWidth = u.axiSubordinateIdWidth + p.subordinateIndexWidth; + p.axiManagerIdWidth = p.axiCcuIdWidth + 1; + p.cachelineByteIndexWidth = u.cachelineWidth > 8 ? $clog2(u.cachelineWidth / 8) : 1; + p.numLineWidth = u.axiAddressWidth - p.cachelineByteIndexWidth; + p.writeTransactionIndexWidth = u.numWriteTransactions > 1 ? $clog2(u.numWriteTransactions) : 1; + p.cachelineAxiTransfers = u.cachelineWidth / u.axiDataWidth; + p.transactionIndexWidth = u.numShareableTransactions > 1 ? $clog2(u.numShareableTransactions) : 1; + p.axiDataSize = (u.axiDataWidth > 8 ? $clog2(u.axiDataWidth / 8) : 1) - 1; + p.addressCheckWidth = u.addressCheckMsb - u.addressCheckLsb + 1; + + return p; + endfunction + +endpackage diff --git a/src/ccu/ccu_read_engine.sv b/src/ccu/ccu_read_engine.sv new file mode 100644 index 0000000..2584d9b --- /dev/null +++ b/src/ccu/ccu_read_engine.sv @@ -0,0 +1,108 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "axi/assign.svh" +`include "ace/convert.svh" + +module ccu_read_engine + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type ccu_axi_ar_t = logic, + parameter type ccu_ace_r_t = logic, + parameter type ccu_axi_r_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input logic ar_valid_i, + output logic ar_ready_o, + input ccu_axi_ar_t ar_i, + + output logic ar_addr_check_o, + input logic ar_addr_hit_i, + output logic [ccuCfg.addressCheckWidth-1:0] ar_addr_slice_o, + + input logic snoop_pipeline_r_valid_i, + output logic snoop_pipeline_r_ready_o, + input ccu_ace_r_t snoop_pipeline_r_i, + + output logic r_valid_o, + input logic r_ready_i, + output ccu_ace_r_t r_o, + + output logic ar_valid_o, + input logic ar_ready_i, + output ccu_axi_ar_t ar_o, + input logic r_valid_i, + output logic r_ready_o, + input ccu_axi_r_t r_i +); + +// AR channel +// {{{ + assign ar_addr_check_o = ar_valid_i; + assign ar_addr_slice_o = ar_i.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + assign ar_valid_o = !ar_addr_hit_i && ar_valid_i; + assign ar_ready_o = !ar_addr_hit_i && ar_ready_i; + + `AXI_ASSIGN_AR_STRUCT(ar_o, ar_i) +// }}} + +// R channel +// {{{ + ccu_ace_r_t r_in; + logic [1:0] r_arbiter_valid; + logic [1:0] r_arbiter_ready; + logic [1:0] mask_d; + logic [1:0] mask_q; + + `AXI_TO_ACE_ASSIGN_R_STRUCT(r_in, r_i) + + assign r_arbiter_valid = {r_valid_i, snoop_pipeline_r_valid_i} & ~mask_q; + assign {r_ready_o, snoop_pipeline_r_ready_o} = r_arbiter_ready & ~mask_q; + + always_comb begin : mask_comb + mask_d = mask_q; + if (r_valid_o && r_ready_i && r_o.last) + mask_d = '0; + else if (r_valid_o && r_ready_i) + mask_d = ~(r_arbiter_valid & r_arbiter_ready); + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) mask_q <= '0; + else mask_q <= mask_d; + end + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ace_r_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_r_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (r_arbiter_valid), + .gnt_o (r_arbiter_ready), + .data_i ({r_in, snoop_pipeline_r_i}), + .req_o (r_valid_o), + .gnt_i (r_ready_i), + .data_o (r_o), + .idx_o () + ); +// }}} +endmodule diff --git a/src/ccu/ccu_replay.sv b/src/ccu/ccu_replay.sv new file mode 100644 index 0000000..0c62d7a --- /dev/null +++ b/src/ccu/ccu_replay.sv @@ -0,0 +1,33 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module ccu_replay + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0} + +) ( + input logic clk_i, + input logic rst_ni, + + input logic replay_alloc_i, + output logic replay_full_o +); + +/* +Stub implementation of replay +TODO: actual implementation +*/ + +assign replay_full_o = 1'b1; + +endmodule diff --git a/src/ccu/ccu_scoreboard.sv b/src/ccu/ccu_scoreboard.sv new file mode 100644 index 0000000..aed6e85 --- /dev/null +++ b/src/ccu/ccu_scoreboard.sv @@ -0,0 +1,118 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module ccu_scoreboard + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + localparam int unsigned scoreboardEntries = ccuCfg.u.numShareableTransactions, + localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth + +) ( + input logic clk_i, + input logic rst_ni, + + output logic full_o, + + input logic alloc_check_i, + input logic alloc_i, + input logic [ccuCfg.u.axiAddressWidth-1:0] alloc_addr_i, + input logic [ccuCfg.axiCcuIdWidth-1:0] alloc_id_i, + output logic alloc_hit_o, + + input logic dealloc_check_i, + input logic [ccuCfg.axiCcuIdWidth-1:0] dealloc_id_i, + output logic dealloc_hit_o, + output logic [scoreboardEntryIndexWidth-1:0] dealloc_hit_entry_o, + + input logic [ccuCfg.u.numSubordinates-1:0] dealloc_i, + input logic [ccuCfg.u.numSubordinates-1:0][scoreboardEntryIndexWidth-1:0] dealloc_entry_i +); + +typedef struct packed { + logic [ccuCfg.addressCheckWidth-1:0] addr; + logic [ccuCfg.axiCcuIdWidth-1:0] id; +} scoreboard_entry_t; + +logic [ccuCfg.addressCheckWidth-1:0] alloc_addr_slice; + +logic [scoreboardEntries-1:0] valid_q; +logic [scoreboardEntries-1:0] valid_d; +scoreboard_entry_t [scoreboardEntries-1:0] entry_q; +scoreboard_entry_t [scoreboardEntries-1:0] entry_d; +logic [scoreboardEntries-1:0] address_hit; +logic [scoreboardEntries-1:0] dealloc_id_hit; +logic [scoreboardEntryIndexWidth-1:0] alloc_entry; + +assign alloc_addr_slice = alloc_addr_i[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + +assign alloc_hit_o = alloc_check_i && |(valid_q & address_hit); +assign dealloc_hit_o = dealloc_check_i && |(valid_q & dealloc_id_hit); + +always_comb begin : alloc_entry_comb + alloc_entry = '0; + for (int unsigned e = 0; e < scoreboardEntries; e++) begin + if (!valid_q[e]) begin + alloc_entry = e; + break; + end + end +end + +assign full_o = &valid_q; + +for (genvar e = 0; e < scoreboardEntries; e++) begin : gen_entry + logic [ccuCfg.subordinateIndexWidth-1:0] subordinate_index; + logic alloc; + logic dealloc; + assign subordinate_index = entry_q[e].id[ccuCfg.axiCcuIdWidth-1-:ccuCfg.subordinateIndexWidth]; + assign alloc = alloc_i && alloc_entry == e; + assign dealloc = dealloc_i[subordinate_index] && dealloc_entry_i[subordinate_index] == e; + assign address_hit[e] = alloc_addr_slice == entry_q[e].addr; + assign dealloc_id_hit[e] = dealloc_id_i == entry_q[e].id; + + always_comb begin : entry_comb + valid_d[e] = valid_q[e]; + entry_d[e] = entry_q[e]; + + unique case (1'b1) + alloc: begin + valid_d[e] = 1'b1; + entry_d[e].addr = alloc_addr_slice; + entry_d[e].id = alloc_id_i; + end + dealloc: begin + valid_d[e] = 1'b0; + end + default: ; + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + valid_q[e] <= 1'b0; + entry_q[e] <= '0; + end else begin + valid_q[e] <= valid_d[e]; + entry_q[e] <= entry_d[e]; + end + end +end + +onehot_to_bin #( + .ONEHOT_WIDTH (scoreboardEntries) +) u_onehot_to_bin ( + .onehot (dealloc_id_hit & valid_q), + .bin (dealloc_hit_entry_o) +); + +endmodule diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv new file mode 100644 index 0000000..dde435e --- /dev/null +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -0,0 +1,608 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "axi/assign.svh" + +module ccu_snoop_pipeline + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type ccu_ace_ar_t = logic, + parameter type ccu_ace_r_t = logic, + parameter type ccu_snoop_ac_t = logic, + parameter type ccu_snoop_cr_t = logic, + parameter type ccu_snoop_cd_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_axi_ar_t = logic, + parameter type ccu_axi_aw_t = logic, + parameter type domain_map_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input domain_map_t [ccuCfg.u.numSubordinates-1:0] domain_map_i, + + input ccu_ace_ar_t ar_i, + input logic ar_valid_i, + output logic ar_ready_o, + + output logic scoreboard_alloc_check_o, + output logic scoreboard_alloc_o, + input logic scoreboard_alloc_hit_i, + input logic scoreboard_full_i, + + output logic replay_alloc_o, + input logic replay_full_i, + + output logic [ccuCfg.u.numSubordinates-1:0] ac_valid_o, + input logic [ccuCfg.u.numSubordinates-1:0] ac_ready_i, + output ccu_snoop_ac_t [ccuCfg.u.numSubordinates-1:0] ac_o, + + input logic [ccuCfg.u.numSubordinates-1:0] cr_valid_i, + output logic [ccuCfg.u.numSubordinates-1:0] cr_ready_o, + input ccu_snoop_cr_t [ccuCfg.u.numSubordinates-1:0] cr_i, + + input logic [ccuCfg.u.numSubordinates-1:0] cd_valid_i, + output logic [ccuCfg.u.numSubordinates-1:0] cd_ready_o, + input ccu_snoop_cd_t [ccuCfg.u.numSubordinates-1:0] cd_i, + + output logic write_engine_aw_valid_o, + input logic write_engine_aw_ready_i, + output ccu_axi_aw_t write_engine_aw_o, + output logic write_engine_w_valid_o, + input logic write_engine_w_ready_i, + output ccu_w_t write_engine_w_o, + + output logic read_engine_ar_valid_o, + input logic read_engine_ar_ready_i, + output ccu_axi_ar_t read_engine_ar_o, + output logic read_engine_r_valid_o, + input logic read_engine_r_ready_i, + output ccu_ace_r_t read_engine_r_o +); + +// AC channel +// {{{ + typedef struct packed { + ccu_snoop_ac_t ac; + logic [ccuCfg.u.numSubordinates-1:0] sel; + } ac_fifo_entry_t; + + ccu_snoop_ac_t ac; + logic [ccuCfg.u.numSubordinates-1:0] ac_sel; + logic ac_valid; + logic ac_ready; + ac_fifo_entry_t ac_fifo_wdata; + ac_fifo_entry_t ac_fifo_rdata; + logic ac_fifo_valid; + logic ac_fifo_ready; + + assign ac_fifo_wdata = '{ + ac: ac, + sel: ac_sel + }; + + stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (ac_fifo_entry_t) + ) u_ac_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (ac_fifo_wdata), + .valid_i (ac_valid), + .ready_o (ac_ready), + .data_o (ac_fifo_rdata), + .valid_o (ac_fifo_valid), + .ready_i (ac_fifo_ready) + ); + + assign ac_o = {ccuCfg.u.numSubordinates{ac_fifo_rdata.ac}}; + + stream_fork_dynamic #( + .N_OUP (ccuCfg.u.numSubordinates) + ) u_ac_fifo_fork ( + .clk_i, + .rst_ni, + .valid_i (ac_fifo_valid), + .ready_o (ac_fifo_ready), + .sel_i (ac_fifo_rdata.sel), + .sel_valid_i (1'b1), + .sel_ready_o (), + .valid_o (ac_valid_o), + .ready_i (ac_ready_i) + ); +// }}} + +// CR channel +// {{{ + logic [ccuCfg.u.numSubordinates-1:0] cr_fifo_valid; + logic [ccuCfg.u.numSubordinates-1:0] cr_fifo_ready; + ccu_snoop_cr_t [ccuCfg.u.numSubordinates-1:0] cr_fifo_rdata; + + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_cr_fifo + stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (ccu_snoop_cr_t) + ) u_cr_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (cr_i[s]), + .valid_i (cr_valid_i[s]), + .ready_o (cr_ready_o[s]), + .data_o (cr_fifo_rdata[s]), + .valid_o (cr_fifo_valid[s]), + .ready_i (cr_fifo_ready[s]) + ); + end +// }}} + +// CR channel +// {{{ + logic [ccuCfg.u.numSubordinates-1:0] cd_fifo_valid; + logic [ccuCfg.u.numSubordinates-1:0] cd_fifo_ready; + ccu_snoop_cd_t [ccuCfg.u.numSubordinates-1:0] cd_fifo_rdata; + + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_cd_fifo + stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (ccu_snoop_cd_t) + ) u_cd_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (cd_i[s]), + .valid_i (cd_valid_i[s]), + .ready_o (cd_ready_o[s]), + .data_o (cd_fifo_rdata[s]), + .valid_o (cd_fifo_valid[s]), + .ready_i (cd_fifo_ready[s]) + ); + end +// }}} + + +// Stage 0 +// {{{ + logic [ccuCfg.subordinateIndexWidth-1:0] subordinate_ar_index; + logic ar_fork_valid; + logic ar_fork_ready; + logic ar_is_read_no_snoop; + acsnoop_t ac_snoop; + logic stage0_valid; + logic stage0_ready; + + assign ac_snoop = ace_ar_acsnoop_map( + ar_i.bar[0], + ar_i.domain, + ar_i.snoop + ); + + assign ar_is_read_no_snoop = ace_is_read_no_snoop( + ar_i.bar[0], + ar_i.domain, + ar_i.snoop + ); + + assign subordinate_ar_index = ar_i.id[ccuCfg.u.axiSubordinateIdWidth+:ccuCfg.subordinateIndexWidth]; + + always_comb begin : ace_sel_comb + unique case (ar_i.domain) + NonShareable : ac_sel = '0; + InnerShareable: ac_sel = domain_map_i[subordinate_ar_index].inner; + OuterShareable: ac_sel = domain_map_i[subordinate_ar_index].outer; + default: ac_sel = ~domain_map_i[subordinate_ar_index].initiator; + endcase + end + + assign ac = '{ + addr: axi_pkg::aligned_addr(ar_i.addr, ccuCfg.cachelineByteIndexWidth), + snoop: ac_snoop, + prot: '0 + }; + + assign scoreboard_alloc_check_o = !ar_is_read_no_snoop && ar_valid_i; + assign replay_alloc_o = !replay_full_i && scoreboard_alloc_hit_i; + + always_comb begin : ar_stall_comb + ar_fork_valid = ar_valid_i; + ar_ready_o = ar_fork_ready; + + if (scoreboard_alloc_hit_i) begin + ar_fork_valid = 1'b0; + ar_ready_o = !replay_full_i; + end + end + + assign scoreboard_alloc_o = !ar_is_read_no_snoop && ar_fork_valid && ar_fork_ready; + + stream_fork_dynamic #( + .N_OUP (2) + ) u_ac_fork ( + .clk_i, + .rst_ni, + .valid_i (ar_fork_valid), + .ready_o (ar_fork_ready), + .sel_i ({!ar_is_read_no_snoop, 1'b1}), + .sel_valid_i (!ar_is_read_no_snoop || !scoreboard_full_i), + .sel_ready_o (), + .valid_o ({ac_valid, stage0_valid}), + .ready_i ({ac_ready, stage0_ready}) + ); +// }}} + +// Stage 1 +// {{{ + typedef struct packed { + ccu_ace_ar_t ar; + logic [ccuCfg.u.numSubordinates-1:0] sel; + } stage1_fifo_entry_t; + + logic stage1_fifo_valid; + logic stage1_fifo_ready; + stage1_fifo_entry_t stage1_fifo_wdata; + stage1_fifo_entry_t stage1_fifo_rdata; + logic accepts_dirty; + logic accepts_shared; + logic is_clean; + ccu_snoop_cr_t cr; + logic [ccuCfg.u.numSubordinates-1:0] cd_data_transfer; + logic engine_fork_valid; + logic engine_fork_ready; + logic read_engine_sel; + logic write_engine_sel; + logic cd_engine_sel; + logic cd_engine_forward_to_read; + logic cd_engine_forward_to_write; + logic cd_engine_ack_to_read; + logic cd_engine_valid; + logic cd_engine_ready; + + assign stage1_fifo_wdata = '{ + ar : ar_i, + sel: ac_sel + }; + + stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (stage1_fifo_entry_t) + ) u_stage1_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (stage1_fifo_wdata), + .valid_i (stage0_valid), + .ready_o (stage0_ready), + .data_o (stage1_fifo_rdata), + .valid_o (stage1_fifo_valid), + .ready_i (stage1_fifo_ready) + ); + + stream_join_dynamic #( + .N_INP (ccuCfg.u.numSubordinates+1) + ) u_cr_join ( + .inp_valid_i ({cr_fifo_valid, stage1_fifo_valid}), + .inp_ready_o ({cr_fifo_ready, stage1_fifo_ready}), + .sel_i ({stage1_fifo_rdata.sel, 1'b1}), + .oup_valid_o (engine_fork_valid), + .oup_ready_i (engine_fork_ready) + ); + + always_comb begin : cr_comb + cr = '0; + cd_data_transfer = '0; + for (int unsigned s = 0; s < ccuCfg.u.numSubordinates; s++) begin + if (stage1_fifo_rdata.sel[s]) begin + cr = cr | cr_fifo_rdata[s]; + cd_data_transfer[s] = cr_fifo_rdata[s].resp.DataTransfer; + end + end + end + + assign accepts_dirty = ace_ar_accepts_dirty( + stage1_fifo_rdata.ar.bar[0], + stage1_fifo_rdata.ar.domain, + stage1_fifo_rdata.ar.snoop + ); + + assign accepts_shared = ace_ar_accepts_shared( + stage1_fifo_rdata.ar.bar[0], + stage1_fifo_rdata.ar.domain, + stage1_fifo_rdata.ar.snoop + ); + + assign is_clean = ace_ar_is_clean( + stage1_fifo_rdata.ar.bar[0], + stage1_fifo_rdata.ar.domain, + stage1_fifo_rdata.ar.snoop + ); + + always_comb begin : engine_sel_comb + read_engine_sel = 1'b0; + write_engine_sel = 1'b0; + cd_engine_sel = 1'b0; + cd_engine_forward_to_read = 1'b0; + cd_engine_forward_to_write = 1'b0; + cd_engine_ack_to_read = 1'b0; + + case ({cr.resp.DataTransfer, is_clean}) + // Forward the request to memory + 2'b00: read_engine_sel = 1'b1; + // Send only the clean R response + 2'b01: begin + cd_engine_sel = 1'b1; + cd_engine_ack_to_read = 1'b1; + end + // At least one snooped manager + // is providing data + default: begin + cd_engine_sel = 1'b1; + cd_engine_forward_to_read = !is_clean; + cd_engine_ack_to_read = is_clean; + if (cr.resp.PassDirty && !accepts_dirty) begin + // The initiator cannot accept dirty data, + // thus we need a writeback + cd_engine_forward_to_write = 1'b1; + write_engine_sel = 1'b1; + end + end + endcase + end + + stream_fork_dynamic #( + .N_OUP(3) + ) u_engine_fork ( + .clk_i, + .rst_ni, + .valid_i (engine_fork_valid), + .ready_o (engine_fork_ready), + .sel_i ({read_engine_sel, write_engine_sel, cd_engine_sel}), + .sel_valid_i(1'b1), + .sel_ready_o(), + .valid_o ({read_engine_ar_valid_o, write_engine_aw_valid_o, cd_engine_valid}), + .ready_i ({read_engine_ar_ready_i, write_engine_aw_ready_i, cd_engine_ready}) + ); + + always_comb begin : read_engine_ar_comb + // ACE to AXI conversion can be done via the macro + `AXI_SET_AR_STRUCT(read_engine_ar_o, stage1_fifo_rdata.ar) + end + + always_comb begin : write_engine_aw_comb + // Derive an AW struct to issue writebacks from the + // original AR request + // The additional ID bit is used to uniquely identify + // writeback operations + // TODO: this might be overkill? + write_engine_aw_o.id = {1'b1, stage1_fifo_rdata.ar.id}; + write_engine_aw_o.addr = axi_pkg::aligned_addr(stage1_fifo_rdata.ar, ccuCfg.cachelineByteIndexWidth); + write_engine_aw_o.len = ccuCfg.cachelineAxiTransfers - 1; + write_engine_aw_o.size = ccuCfg.axiDataSize; + write_engine_aw_o.burst = axi_pkg::BURST_WRAP; + write_engine_aw_o.lock = 1'b0; + // Enforce non-bufferable requirements + // This should fix premature B responses + write_engine_aw_o.cache = stage1_fifo_rdata.ar.cache & ~{axi_pkg::CacheWidth{1'b1}}; + write_engine_aw_o.prot = stage1_fifo_rdata.ar.prot; + write_engine_aw_o.qos = stage1_fifo_rdata.ar.qos; + write_engine_aw_o.region = stage1_fifo_rdata.ar.region; + write_engine_aw_o.atop = '0; + write_engine_aw_o.user = stage1_fifo_rdata.ar.user; + end + + assign cd_engine_resp_shared = cr.resp.IsShared && accepts_shared; + assign cd_engine_resp_dirty = cr.resp.PassDirty && accepts_dirty; +// }}} + +// CD engine +// {{{ + localparam int unsigned cachelineTransferIndexWidth = ccuCfg.cachelineAxiTransfers > 1 ? + $clog2(ccuCfg.cachelineAxiTransfers) : 1; + + typedef struct packed { + logic [ccuCfg.u.numSubordinates-1:0] sel; + logic forward_to_read; + logic forward_to_write; + logic ack_to_read; + logic resp_shared; + logic resp_dirty; + logic [ccuCfg.axiCcuIdWidth-1:0] ar_id; + logic [ccuCfg.u.axiUserWidth-1:0] ar_user; + logic ar_lock; + } cd_engine_fifo_entry_t; + + logic cd_engine_fifo_valid; + logic cd_engine_fifo_ready; + cd_engine_fifo_entry_t cd_engine_fifo_wdata; + cd_engine_fifo_entry_t cd_engine_fifo_rdata; + + assign cd_engine_fifo_wdata = '{ + sel: cd_data_transfer, + forward_to_read: cd_engine_forward_to_read, + forward_to_write: cd_engine_forward_to_write, + ack_to_read: cd_engine_ack_to_read, + resp_shared: cd_engine_resp_shared, + resp_dirty: cd_engine_resp_dirty, + ar_id: stage1_fifo_rdata.ar.id, + ar_user: stage1_fifo_rdata.ar.user, + ar_lock: stage1_fifo_rdata.ar.lock + }; + + logic cd_global_counter_en; + logic [cachelineTransferIndexWidth-1:0] cd_global_counter_q; + logic cd_fork_valid; + logic cd_fork_ready; + logic r_mux_ack_valid; + logic r_mux_ack_ready; + + logic read_engine_r_last; + rresp_t read_engine_r_resp; + logic [ccuCfg.u.numSubordinates-1:0] cd_up_to_date; + logic [ccuCfg.u.numSubordinates-1:0] cd_valid; + logic [ccuCfg.u.numSubordinates-1:0] cd_ready; + ccu_snoop_cd_t cd; + logic cd_engine_data_valid; + logic cd_engine_data_ready; + logic cd_read_engine_r_valid; + logic cd_read_engine_r_ready; + + stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (cd_engine_fifo_entry_t) + ) u_cd_engine_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (cd_engine_fifo_wdata), + .valid_i (cd_engine_valid), + .ready_o (cd_engine_ready), + .data_o (cd_engine_fifo_rdata), + .valid_o (cd_engine_fifo_valid), + .ready_i (cd_engine_fifo_ready) + ); + + stream_fork_dynamic #( + .N_OUP (2) + ) u_cd_engine_fork ( + .clk_i, + .rst_ni, + .valid_i (cd_engine_fifo_valid), + .ready_o (cd_engine_fifo_ready), + .sel_i ({cd_engine_fifo_rdata.ack_to_read, |cd_engine_fifo_rdata.sel}), + .sel_valid_i (1'b1), + .sel_ready_o (), + .valid_o ({r_mux_ack_valid, cd_fork_valid}), + .ready_i ({r_mux_ack_ready, cd_fork_ready} & {1'b1, cd.last}) + ); + + assign cd_valid = cd_fifo_valid & cd_engine_fifo_rdata.sel; + assign cd_fifo_ready = cd_ready & cd_engine_fifo_rdata.sel; + + assign cd_engine_data_valid = |(cd_valid & cd_up_to_date); + assign cd_global_counter_en = cd_engine_data_valid && cd_engine_data_ready; + + counter #( + .WIDTH (cachelineTransferIndexWidth) + ) u_cd_global_counter ( + .clk_i, + .rst_ni, + .clear_i (1'b0), + .en_i (cd_global_counter_en), + .load_i (1'b0), + .down_i (1'b0), + .d_i ('0), + .q_o (cd_global_counter_q), + .overflow_o () + ); + + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_cd_local_counter + + logic cd_local_counter_en; + logic [cachelineTransferIndexWidth-1:0] cd_local_counter_q; + + counter #( + .WIDTH (cachelineTransferIndexWidth) + ) u_cd_local_counter ( + .clk_i, + .rst_ni, + .clear_i (1'b0), + .en_i (cd_local_counter_en), + .load_i (1'b0), + .down_i (1'b0), + .d_i ('0), + .q_o (cd_local_counter_q), + .overflow_o () + ); + assign cd_local_counter_en = cd_valid[s] && cd_ready[s]; + assign cd_up_to_date[s] = cd_local_counter_q == cd_global_counter_q; + assign cd_ready[s] = !cd_up_to_date[s] || cd_engine_data_ready; + end + + always_comb begin : cd_mux_comb + cd = cd_fifo_rdata[0]; + for (int s = 0; s < ccuCfg.u.numSubordinates; s++) begin + if (cd_valid[s] && cd_up_to_date[s]) begin + cd = cd_fifo_rdata[s]; + break; + end + end + end + + stream_fork_dynamic #( + .N_OUP (2) + ) u_cd_data_fork ( + .clk_i, + .rst_ni, + .valid_i (cd_engine_data_valid), + .ready_o (cd_engine_data_ready), + .sel_i ({cd_engine_fifo_rdata.forward_to_read, cd_engine_fifo_rdata.forward_to_write}), + .sel_valid_i (cd_fork_valid), + .sel_ready_o (cd_fork_ready), + .valid_o ({cd_read_engine_r_valid, write_engine_w_valid_o}), + .ready_i ({cd_read_engine_r_ready, write_engine_w_ready_i}) + ); + + always_comb begin : rresp_comb + read_engine_r_resp[RESP_IS_DIRTY] = cd_engine_fifo_rdata.resp_dirty; + read_engine_r_resp[RESP_IS_SHARED] = cd_engine_fifo_rdata.resp_shared; + if (cd_engine_fifo_rdata.ar_lock) + read_engine_r_resp[1:0] = axi_pkg::RESP_EXOKAY; + else + read_engine_r_resp[1:0] = axi_pkg::RESP_OKAY; + end + + // Only the `r_last` field has to be multiplexed + stream_mux #( + .N_INP (2), + .DATA_T(logic) + ) u_r_mux ( + .inp_data_i ({1'b1, cd.last}), + .inp_valid_i ({r_mux_ack_valid, cd_read_engine_r_valid}), + .inp_ready_o ({r_mux_ack_ready, cd_read_engine_r_ready}), + .inp_sel_i (cd_engine_fifo_rdata.ack_to_read), + .oup_data_o (read_engine_r_last), + .oup_valid_o (read_engine_r_valid_o), + .oup_ready_i (read_engine_r_ready_i) + ); + + assign read_engine_r_o = '{ + id: cd_engine_fifo_rdata.ar_id, + data: cd.data, + resp: read_engine_r_resp, + last: read_engine_r_last, + user: cd_engine_fifo_rdata.ar_user + }; + + assign write_engine_w_o = '{ + data: cd.data, + strb: '1, + last: cd.last, + user: cd_engine_fifo_rdata.ar_user + }; +// }}} +endmodule diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv new file mode 100644 index 0000000..aa550ea --- /dev/null +++ b/src/ccu/ccu_top.sv @@ -0,0 +1,363 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "axi/typedef.svh" +`include "ace/typedef.svh" +`include "axi/assign.svh" +`include "ace/assign.svh" + +module ccu_top + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type domain_map_t = logic, + parameter type ccu_ace_subordinate_ar_t = logic, + parameter type ccu_ace_subordinate_aw_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_ace_subordinate_r_t = logic, + parameter type ccu_ace_subordinate_b_t = logic, + parameter type ccu_ace_subordinate_req_t = logic, + parameter type ccu_ace_subordinate_resp_t = logic, + parameter type ccu_axi_manager_ar_t = logic, + parameter type ccu_axi_manager_aw_t = logic, + parameter type ccu_axi_manager_r_t = logic, + parameter type ccu_axi_manager_b_t = logic, + parameter type ccu_axi_manager_req_t = logic, + parameter type ccu_axi_manager_resp_t = logic, + parameter type ccu_snoop_ac_t = logic, + parameter type ccu_snoop_cr_t = logic, + parameter type ccu_snoop_cd_t = logic, + parameter type ccu_snoop_req_t = logic, + parameter type ccu_snoop_resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input domain_map_t [ccuCfg.u.numSubordinates-1:0] domain_map_i, + input ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_req_i, + output ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_resp_o, + input logic [ccuCfg.u.numSubordinates-1:0] subordinate_rack_i, + input logic [ccuCfg.u.numSubordinates-1:0] subordinate_wack_i, + output ccu_snoop_req_t [ccuCfg.u.numSubordinates-1:0] snoop_req_o, + input ccu_snoop_resp_t [ccuCfg.u.numSubordinates-1:0] snoop_resp_i, + output ccu_axi_manager_req_t manager_req_o, + input ccu_axi_manager_resp_t manager_resp_i +); + +// AXI/ACE typedefs +// {{{ + typedef logic [ccuCfg.axiCcuIdWidth-1:0] ccu_id_t; + typedef logic [ccuCfg.u.axiAddressWidth-1:0] ccu_address_t; + typedef logic [ccuCfg.u.axiDataWidth-1:0] ccu_data_t; + typedef logic [ccuCfg.u.axiDataWidth/8-1:0] ccu_strb_t; + typedef logic [ccuCfg.u.axiUserWidth-1:0] ccu_user_t; + + `ACE_TYPEDEF_AW_CHAN_T(ccu_ace_aw_t, ccu_address_t, ccu_id_t, ccu_user_t) + `AXI_TYPEDEF_B_CHAN_T(ccu_ace_b_t, ccu_id_t, ccu_user_t) + `ACE_TYPEDEF_AR_CHAN_T(ccu_ace_ar_t, ccu_address_t, ccu_id_t, ccu_user_t) + `ACE_TYPEDEF_R_CHAN_T(ccu_ace_r_t, ccu_data_t, ccu_id_t, ccu_user_t) + `ACE_TYPEDEF_REQ_T(ccu_ace_req_t, ccu_ace_aw_t, ccu_w_t, ccu_ace_ar_t) + `ACE_TYPEDEF_RESP_T(ccu_ace_resp_t, ccu_ace_b_t, ccu_ace_r_t) +// }}} + +localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth; + +logic scoreboard_full; +logic scoreboard_alloc_check; +logic scoreboard_alloc; +logic scoreboard_alloc_hit; +logic scoreboard_dealloc_check; +logic [ccuCfg.axiCcuIdWidth-1:0] scoreboard_dealloc_id; +logic scoreboard_dealloc_hit; +logic [scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_hit_entry; +logic [ccuCfg.u.numSubordinates] scoreboard_dealloc; +logic [ccuCfg.u.numSubordinates][scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_entry; + +logic replay_alloc; +logic replay_full; + +logic [ccuCfg.u.numSubordinates-1:0] snoop_ac_valid; +logic [ccuCfg.u.numSubordinates-1:0] snoop_ac_ready; +ccu_snoop_ac_t [ccuCfg.u.numSubordinates-1:0] snoop_ac; +logic [ccuCfg.u.numSubordinates-1:0] snoop_cr_valid; +logic [ccuCfg.u.numSubordinates-1:0] snoop_cr_ready; +ccu_snoop_cr_t [ccuCfg.u.numSubordinates-1:0] snoop_cr; +logic [ccuCfg.u.numSubordinates-1:0] snoop_cd_valid; +logic [ccuCfg.u.numSubordinates-1:0] snoop_cd_ready; +ccu_snoop_cd_t [ccuCfg.u.numSubordinates-1:0] snoop_cd; + +logic snoop_write_engine_aw_valid; +logic snoop_write_engine_aw_ready; +ccu_axi_manager_aw_t snoop_write_engine_aw; +logic snoop_write_engine_w_valid; +logic snoop_write_engine_w_ready; +ccu_w_t snoop_write_engine_w; +logic snoop_read_engine_ar_valid; +logic snoop_read_engine_ar_ready; +ccu_axi_manager_ar_t snoop_read_engine_ar; +logic snoop_read_engine_r_valid; +logic snoop_read_engine_r_ready; +ccu_ace_r_t snoop_read_engine_r; + + +logic read_engine_addr_check; +logic read_engine_addr_hit; +logic [ccuCfg.addressCheckWidth-1:0] read_engine_addr_slice; + +ccu_axi_manager_req_t manager_cut_req; +ccu_axi_manager_resp_t manager_cut_resp; + +// Frontend +// {{{ + // The frontend acts as the Point of Serialization (PoS) + ccu_ace_req_t frontend_req; + ccu_ace_resp_t frontend_resp; + logic shareable_stall; + + assign shareable_stall = 1'b0; + + ccu_frontend #( + .ccuCfg (ccuCfg), + .ccu_ace_manager_ar_t (ccu_ace_ar_t), + .ccu_ace_manager_aw_t (ccu_ace_aw_t), + .ccu_w_t (ccu_w_t), + .ccu_ace_manager_r_t (ccu_ace_r_t), + .ccu_ace_manager_b_t (ccu_ace_b_t), + .ccu_ace_manager_req_t (ccu_ace_req_t), + .ccu_ace_manager_resp_t (ccu_ace_resp_t), + .ccu_ace_subordinate_ar_t (ccu_ace_subordinate_ar_t), + .ccu_ace_subordinate_aw_t (ccu_ace_subordinate_aw_t), + .ccu_ace_subordinate_r_t (ccu_ace_subordinate_r_t), + .ccu_ace_subordinate_b_t (ccu_ace_subordinate_b_t), + .ccu_ace_subordinate_req_t (ccu_ace_subordinate_req_t), + .ccu_ace_subordinate_resp_t (ccu_ace_subordinate_resp_t) + ) u_ccu_frontend ( + .clk_i, + .rst_ni, + .shareable_stall_i (shareable_stall), + .subordinate_req_i (subordinate_req_i), + .subordinate_resp_o (subordinate_resp_o), + .subordinate_rack_i (subordinate_rack_i), + .subordinate_wack_i (subordinate_wack_i), + .manager_req_o (frontend_req), + .manager_resp_i (frontend_resp), + .scoreboard_dealloc_check_o (scoreboard_dealloc_check), + .scoreboard_dealloc_id_o (scoreboard_dealloc_id), + .scoreboard_dealloc_hit_i (scoreboard_dealloc_hit), + .scoreboard_dealloc_entry_i (scoreboard_dealloc_hit_entry), + .scoreboard_dealloc_o (scoreboard_dealloc), + .scoreboard_dealloc_entry_o (scoreboard_dealloc_entry) + ); +// }}} + +// AR-related snoop pipeline +// {{{ + ccu_snoop_pipeline #( + .ccuCfg (ccuCfg), + .ccu_ace_ar_t (ccu_ace_ar_t), + .ccu_ace_r_t (ccu_ace_r_t), + .ccu_snoop_ac_t (ccu_snoop_ac_t), + .ccu_snoop_cr_t (ccu_snoop_cr_t), + .ccu_snoop_cd_t (ccu_snoop_cd_t), + .ccu_w_t (ccu_w_t), + .ccu_axi_ar_t (ccu_axi_manager_ar_t), + .ccu_axi_aw_t (ccu_axi_manager_aw_t), + .domain_map_t (domain_map_t) + ) u_ccu_snoop_pipeline ( + .clk_i, + .rst_ni, + .domain_map_i (domain_map_i), + .ar_i (frontend_req.ar), + .ar_valid_i (frontend_req.ar_valid), + .ar_ready_o (frontend_resp.ar_ready), + .scoreboard_alloc_check_o (scoreboard_alloc_check), + .scoreboard_alloc_o (scoreboard_alloc), + .scoreboard_alloc_hit_i (scoreboard_alloc_hit), + .scoreboard_full_i (scoreboard_full), + .replay_alloc_o (replay_alloc), + .replay_full_i (replay_full), + .ac_valid_o (snoop_ac_valid), + .ac_ready_i (snoop_ac_ready), + .ac_o (snoop_ac), + .cr_valid_i (snoop_cr_valid), + .cr_ready_o (snoop_cr_ready), + .cr_i (snoop_cr), + .cd_valid_i (snoop_cd_valid), + .cd_ready_o (snoop_cd_ready), + .cd_i (snoop_cd), + .write_engine_aw_valid_o (snoop_write_engine_aw_valid), + .write_engine_aw_ready_i (snoop_write_engine_aw_ready), + .write_engine_aw_o (snoop_write_engine_aw), + .write_engine_w_valid_o (snoop_write_engine_w_valid), + .write_engine_w_ready_i (snoop_write_engine_w_ready), + .write_engine_w_o (snoop_write_engine_w), + .read_engine_ar_valid_o (snoop_read_engine_ar_valid), + .read_engine_ar_ready_i (snoop_read_engine_ar_ready), + .read_engine_ar_o (snoop_read_engine_ar), + .read_engine_r_valid_o (snoop_read_engine_r_valid), + .read_engine_r_ready_i (snoop_read_engine_r_ready), + .read_engine_r_o (snoop_read_engine_r) + ); + + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_snoop_assignments + `SNOOP_ASSIGN_AC_STRUCT(snoop_req_o[s].ac, snoop_ac[s]) + assign snoop_req_o[s].ac_valid = snoop_ac_valid[s]; + assign snoop_ac_ready[s] = snoop_resp_i[s].ac_ready; + `SNOOP_ASSIGN_CR_STRUCT(snoop_cr[s], snoop_resp_i[s].cr) + assign snoop_cr_valid[s] = snoop_resp_i[s].cr_valid; + assign snoop_req_o[s].cr_ready = snoop_cr_ready[s]; + `SNOOP_ASSIGN_CD_STRUCT(snoop_cd[s], snoop_resp_i[s].cd) + assign snoop_cd_valid[s] = snoop_resp_i[s].cd_valid; + assign snoop_req_o[s].cd_ready = snoop_cd_ready[s]; + end +// }}} + +// Scoreboard +// {{{ + ccu_scoreboard #( + .ccuCfg (ccuCfg) + ) u_ccu_scoreboard ( + .clk_i, + .rst_ni, + .full_o (scoreboard_full), + .alloc_check_i (scoreboard_alloc_check), + .alloc_i (scoreboard_alloc), + .alloc_addr_i (frontend_req.ar.addr), + .alloc_id_i (frontend_req.ar.id), + .alloc_hit_o (scoreboard_alloc_hit), + .dealloc_check_i (scoreboard_dealloc_check), + .dealloc_id_i (scoreboard_dealloc_id), + .dealloc_hit_o (scoreboard_dealloc_hit), + .dealloc_hit_entry_o (scoreboard_dealloc_hit_entry), + .dealloc_i (scoreboard_dealloc), + .dealloc_entry_i (scoreboard_dealloc_entry) + ); +// }}} + +// Replay list +// TODO: currently a stub, to be implemented +// {{{ + ccu_replay #( + .ccuCfg (ccuCfg) + ) u_ccu_replay ( + .clk_i, + .rst_ni, + .replay_alloc_i (replay_alloc), + .replay_full_o (replay_full) + ); +// }}} + +// Write engine +// {{{ + /* + NOTE: AW/W/B pipelining happens in the frontend + by adding spill registers on these channels + */ + + // Intermediate signals to zero-pad the ID + // and drop the coherence fields + ccu_axi_manager_aw_t frontend_req_aw; + ccu_axi_manager_b_t frontend_resp_b; + + `AXI_ASSIGN_AW_STRUCT(frontend_req_aw, frontend_req.aw) + `AXI_ASSIGN_B_STRUCT(frontend_resp.b, frontend_resp_b) + + ccu_write_engine #( + .ccuCfg (ccuCfg), + .ccu_axi_aw_t (ccu_axi_manager_aw_t), + .ccu_w_t (ccu_w_t), + .ccu_axi_b_t (ccu_axi_manager_b_t) + ) u_ccu_write_engine ( + .clk_i, + .rst_ni, + .aw_valid_i (frontend_req.aw_valid), + .aw_ready_o (frontend_resp.aw_ready), + .aw_i (frontend_req_aw), + .w_valid_i (frontend_req.w_valid), + .w_ready_o (frontend_resp.w_ready), + .w_i (frontend_req.w), + .b_valid_o (frontend_resp.b_valid), + .b_ready_i (frontend_req.b_ready), + .b_o (frontend_resp_b), + .writeback_aw_valid_i (snoop_write_engine_aw_valid), + .writeback_aw_ready_o (snoop_write_engine_aw_ready), + .writeback_aw_i (snoop_write_engine_aw), + .writeback_w_valid_i (snoop_write_engine_w_valid), + .writeback_w_ready_o (snoop_write_engine_w_ready), + .writeback_w_i (snoop_write_engine_w), + .aw_valid_o (manager_cut_req.aw_valid), + .aw_ready_i (manager_cut_resp.aw_ready), + .aw_o (manager_cut_req.aw), + .w_valid_o (manager_cut_req.w_valid), + .w_ready_i (manager_cut_resp.w_ready), + .w_o (manager_cut_req.w), + .b_valid_i (manager_cut_resp.b_valid), + .b_ready_o (manager_cut_req.b_ready), + .b_i (manager_cut_resp.b), + .read_engine_addr_check_i (read_engine_addr_check), + .read_engine_addr_hit_o (read_engine_addr_hit), + .read_engine_addr_slice_i (read_engine_addr_slice) + ); +// }}} + +// Read engine +// {{{ + ccu_read_engine #( + .ccuCfg (ccuCfg), + .ccu_axi_ar_t (ccu_axi_manager_ar_t), + .ccu_ace_r_t (ccu_ace_r_t), + .ccu_axi_r_t (ccu_axi_manager_r_t) + ) u_ccu_read_engine ( + .clk_i, + .rst_ni, + .ar_valid_i (snoop_read_engine_ar_valid), + .ar_ready_o (snoop_read_engine_ar_ready), + .ar_i (snoop_read_engine_ar), + .ar_addr_check_o (read_engine_addr_check), + .ar_addr_hit_i (read_engine_addr_hit), + .ar_addr_slice_o (read_engine_addr_slice), + .snoop_pipeline_r_valid_i (snoop_read_engine_r_valid), + .snoop_pipeline_r_ready_o (snoop_read_engine_r_ready), + .snoop_pipeline_r_i (snoop_read_engine_r), + .r_valid_o (frontend_resp.r_valid), + .r_ready_i (frontend_req.r_ready), + .r_o (frontend_resp.r), + .ar_valid_o (manager_cut_req.ar_valid), + .ar_ready_i (manager_cut_resp.ar_ready), + .ar_o (manager_cut_req.ar), + .r_valid_i (manager_cut_resp.r_valid), + .r_ready_o (manager_cut_req.r_ready), + .r_i (manager_cut_resp.r) + ); +// }}} + +// AXI cut +// {{{ + axi_cut #( + .Bypass (1'b0), + .aw_chan_t (ccu_axi_manager_aw_t), + .w_chan_t (ccu_w_t), + .b_chan_t (ccu_axi_manager_b_t), + .ar_chan_t (ccu_axi_manager_ar_t), + .r_chan_t (ccu_axi_manager_r_t), + .axi_req_t (ccu_axi_manager_req_t), + .axi_resp_t (ccu_axi_manager_resp_t) + ) u_ccu_axi_manager_cut ( + .clk_i, + .rst_ni, + .slv_req_i (manager_cut_req), + .slv_resp_o (manager_cut_resp), + .mst_req_o (manager_req_o), + .mst_resp_i (manager_resp_i) + ); +// }}} +endmodule diff --git a/src/ccu/ccu_write_engine.sv b/src/ccu/ccu_write_engine.sv new file mode 100644 index 0000000..4f30b28 --- /dev/null +++ b/src/ccu/ccu_write_engine.sv @@ -0,0 +1,210 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "axi/assign.svh" + +module ccu_write_engine + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type ccu_axi_aw_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_axi_b_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input logic aw_valid_i, + output logic aw_ready_o, + input ccu_axi_aw_t aw_i, + input logic w_valid_i, + output logic w_ready_o, + input ccu_w_t w_i, + output logic b_valid_o, + input logic b_ready_i, + output ccu_axi_b_t b_o, + + input logic writeback_aw_valid_i, + output logic writeback_aw_ready_o, + input ccu_axi_aw_t writeback_aw_i, + input logic writeback_w_valid_i, + output logic writeback_w_ready_o, + input ccu_w_t writeback_w_i, + + output logic aw_valid_o, + input logic aw_ready_i, + output ccu_axi_aw_t aw_o, + output logic w_valid_o, + input logic w_ready_i, + output ccu_w_t w_o, + input logic b_valid_i, + output logic b_ready_o, + input ccu_axi_b_t b_i, + + input logic read_engine_addr_check_i, + output logic read_engine_addr_hit_o, + input logic [ccuCfg.addressCheckWidth-1:0] read_engine_addr_slice_i +); + +// Inflight addresses associative map +// {{{ + logic [ccuCfg.addressCheckWidth-1:0] write_inflight_map_wdata; + logic write_inflight_map_push; + logic write_inflight_map_pop; + logic write_inflight_map_full; + + assign write_inflight_map_wdata = aw_o.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + assign write_inflight_map_push = aw_valid_o && aw_ready_i; + assign write_inflight_map_pop = b_valid_i && b_ready_o; + + id_queue #( + .ID_WIDTH (ccuCfg.axiManagerIdWidth), + .CAPACITY (ccuCfg.u.numWriteTransactions), + .FULL_BW (1'b1), + .CUT_OUP_POP_INP_GNT (1'b1), + .NUM_CMP_PORTS (1), + .data_t (logic [ccuCfg.addressCheckWidth-1:0]) + ) u_write_inflight_map ( + .clk_i, + .rst_ni, + .inp_id_i (aw_o.id), + .inp_data_i (write_inflight_map_wdata), + .inp_req_i (write_inflight_map_push), + .inp_gnt_o (), + .exists_data_i (read_engine_addr_slice_i), + .exists_mask_i ('1), + .exists_req_i (read_engine_addr_check_i), + .exists_o (read_engine_addr_hit_o), + .exists_gnt_o (), + .oup_id_i (b_i.id), + .oup_pop_i (1'b1), + .oup_req_i (write_inflight_map_pop), + .oup_data_o (), + .oup_data_valid_o (), + .oup_gnt_o (), + .full_o (write_inflight_map_full), + .empty_o () + ); +// }}} + +// AW channel +// {{{ + + logic w_ctrl_fifo_valid_in; + logic w_ctrl_fifo_ready_in; + logic aw_is_writeback; + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_axi_aw_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_aw_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i ({writeback_aw_valid_i, aw_valid_i}), + .gnt_o ({writeback_aw_ready_o, aw_ready_o}), + .data_i ({writeback_aw_i , aw_i }), + .req_o (aw_valid), + .gnt_i (aw_ready), + .data_o (aw_o), + .idx_o (aw_is_writeback) + ); + + stream_fork_dynamic #( + .N_OUP(2) + ) u_aw_fork ( + .clk_i, + .rst_ni, + .valid_i (aw_valid), + .ready_o (aw_ready), + .sel_i ('1), + .sel_valid_i (!write_inflight_map_full), + .sel_ready_o (), + .valid_o ({aw_valid_o, w_ctrl_fifo_valid_in}), + .ready_i ({aw_ready_i, w_ctrl_fifo_ready_in}) + ); +// }}} + +// W muxing +// {{{ + logic w_ctrl_fifo_valid_out; + logic w_ctrl_fifo_ready_out; + logic w_is_writeback; + logic w_mux_valid_out; + logic w_mux_ready_out; + + stream_fifo #( + .FALL_THROUGH(1'b1), + .DATA_WIDTH (1), + .DEPTH (2) + ) u_w_ctrl_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (), + .data_i (aw_is_writeback), + .valid_i (w_ctrl_fifo_valid_in), + .ready_o (w_ctrl_fifo_ready_in), + .data_o (w_is_write_back), + .valid_o (w_ctrl_fifo_valid_out), + .ready_i (w_ctrl_fifo_ready_out && w_o.last) + ); + + stream_mux #( + .DATA_T(ccu_w_t), + .N_INP (2) + ) u_w_mux ( + .inp_data_i ({writeback_w_i , w_i}), + .inp_valid_i({writeback_w_valid_i, w_valid_i}), + .inp_ready_o({writeback_w_ready_o, w_ready_o}), + .inp_sel_i (w_is_write_back), + .oup_data_o (w_o), + .oup_valid_o(w_mux_valid_out), + .oup_ready_i(w_mux_ready_out) + ); + + stream_join #( + .N_INP(2) + ) u_w_join ( + .inp_valid_i({w_ctrl_fifo_valid_out, w_mux_valid_out}), + .inp_ready_o({w_ctrl_fifo_ready_out, w_mux_ready_out}), + .oup_valid_o(w_valid_o), + .oup_ready_i(w_ready_i) + ); +// }}} + +// B channel filtering +// {{{ + logic b_is_write_back; + + // The additional ID bit is used to uniquely identify + // writeback operations + // TODO: this might be overkill? + assign b_is_write_back = b_i.id[ccuCfg.axiCcuIdWidth]; + + stream_filter u_b_filter ( + .valid_i(b_valid_i), + .ready_o(b_ready_o), + .drop_i (b_is_write_back), + .valid_o(b_valid_o), + .ready_i(b_ready_i) + ); + + `AXI_ASSIGN_B_STRUCT(b_o, b_i) +// }}} +endmodule diff --git a/src/ccu/deprecated/ccu_ctrl.sv b/src/ccu/deprecated/ccu_ctrl.sv deleted file mode 100644 index b0e1ff2..0000000 --- a/src/ccu/deprecated/ccu_ctrl.sv +++ /dev/null @@ -1,573 +0,0 @@ -// Copyright 2022 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -`include "ace/assign.svh" -`include "ace/typedef.svh" - -module ccu_ctrl import ccu_ctrl_pkg::*; import axi_pkg::*; -#( - parameter int unsigned DcacheLineWidth = 0, - parameter int unsigned DcacheIndexWidth = 0, - parameter int unsigned AxiDataWidth = 0, - parameter int unsigned AxiAddrWidth = 0, - parameter int unsigned NoMstPorts = 4, - parameter int unsigned SlvAxiIDWidth = 0, - parameter bit CollisionOnSetOnly = 0, - parameter type mst_aw_chan_t = logic, - parameter type w_chan_t = logic, - parameter type mst_b_chan_t = logic, - parameter type mst_ar_chan_t = logic, - parameter type mst_r_chan_t = logic, - parameter type mst_req_t = logic, - parameter type mst_resp_t = logic, - parameter type slv_aw_chan_t = logic, - parameter type slv_b_chan_t = logic, - parameter type slv_ar_chan_t = logic, - parameter type slv_r_chan_t = logic, - parameter type slv_req_t = logic, - parameter type slv_resp_t = logic, - parameter type snoop_ac_t = logic, - parameter type snoop_cr_t = logic, - parameter type snoop_cd_t = logic, - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic -) ( - //clock and reset - input clk_i, - input rst_ni, - // CCU Request In and response out - input slv_req_t ccu_req_i, - output slv_resp_t ccu_resp_o, - //CCU Request Out and response in - output mst_req_t ccu_req_o, - input mst_resp_t ccu_resp_i, - // Snoop channel resuest and response - output snoop_req_t [NoMstPorts-1:0] s2m_req_o, - input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i, - // Perf counters - output logic [7:0] perf_evt_o -); - -logic [7:0] perf_evt; - -localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; -localparam int unsigned DCacheByteOffset = $clog2(DcacheLineWidth/8); -localparam int unsigned MstIdxBits = $clog2(NoMstPorts); - -localparam int unsigned IdQueueDataWidth = CollisionOnSetOnly ? - DcacheIndexWidth : - AxiAddrWidth - DCacheByteOffset; - -typedef logic [IdQueueDataWidth-1:0] id_queue_data_t; - -logic [SlvAxiIDWidth:0] b_inp_id; -id_queue_data_t b_inp_data; -logic b_inp_req; -logic b_inp_gnt; - -id_queue_data_t b_exists_data; -id_queue_data_t b_exists_mask; -logic b_exists_req; -logic b_exists; -logic b_exists_gnt; - -logic [SlvAxiIDWidth:0] b_oup_id; -logic b_oup_pop; -logic b_oup_req; -id_queue_data_t b_oup_data; -logic b_oup_data_valid; -logic b_oup_gnt; - -logic [SlvAxiIDWidth:0] r_inp_id; -id_queue_data_t r_inp_data; -logic r_inp_req; -logic r_inp_gnt; - -id_queue_data_t r_exists_data; -id_queue_data_t r_exists_mask; -logic r_exists_req; -logic r_exists; -logic r_exists_gnt; - -logic [SlvAxiIDWidth:0] r_oup_id; -logic r_oup_pop; -logic r_oup_req; -id_queue_data_t r_oup_data; -logic r_oup_data_valid; -logic r_oup_gnt; - - -slv_resp_t mu_ccu_resp; -slv_req_t mu_ccu_req; - -su_op_e su_op; -mu_op_e mu_op; - -logic su_req, mu_req; - -logic su_gnt, mu_gnt; - -slv_req_t dec_ccu_req_holder; - -logic dec_shared, dec_dirty; - -logic [MstIdxBits-1:0] dec_first_responder, cd_first_responder_in, cd_first_responder_out; - -snoop_cd_t [NoMstPorts-1:0] cd; -snoop_cd_t cd_first_responder; -logic cd_handshake, mu_cd_handshake, su_cd_handshake; -logic [NoMstPorts-1:0] cd_valid; -logic [NoMstPorts-1:0] cd_ready; -logic [NoMstPorts-1:0] cd_data_available_in, cd_data_available_out; -logic [NoMstPorts-1:0] cd_last_q; -logic cd_fifo_full, mu_cd_fifo_full, su_cd_fifo_full; - -slv_r_chan_t su_r; -logic su_r_valid, su_r_ready; - -logic ccu_ar_ready, ccu_aw_ready; - -snoop_req_t [NoMstPorts-1:0] dec_snoop_req; - -logic dec_lookup_req; -logic [AxiAddrWidth-1:0] dec_lookup_addr; - -slv_aw_chan_t b_queue_aw; -slv_ar_chan_t r_queue_ar; - -logic b_queue_push, r_queue_push; - -logic dec_cd_fifo_stall; - -ccu_ctrl_decoder #( - .DcacheLineWidth (DcacheLineWidth), - .AxiDataWidth (AxiDataWidth), - .AxiAddrWidth (AxiAddrWidth), - .NoMstPorts (NoMstPorts), - .SlvAxiIDWidth (SlvAxiIDWidth), - .slv_aw_chan_t (slv_aw_chan_t), - .w_chan_t (w_chan_t), - .slv_b_chan_t (slv_b_chan_t), - .slv_ar_chan_t (slv_ar_chan_t), - .slv_r_chan_t (slv_r_chan_t), - .slv_req_t (slv_req_t), - .slv_resp_t (slv_resp_t), - .snoop_ac_t (snoop_ac_t), - .snoop_cr_t (snoop_cr_t), - .snoop_cd_t (snoop_cd_t), - .snoop_req_t (snoop_req_t), - .snoop_resp_t (snoop_resp_t) -) ccu_ctrl_decoder_i ( - .clk_i, - .rst_ni, - - .ccu_req_i, - - .s2m_req_o (dec_snoop_req), - .m2s_resp_i, - - .slv_aw_ready_o (ccu_aw_ready), - .slv_ar_ready_o (ccu_ar_ready), - - .ccu_req_holder_o (dec_ccu_req_holder), - .su_gnt_i (su_gnt), - .mu_gnt_i (mu_gnt), - .su_req_o (su_req), - .mu_req_o (mu_req), - .su_op_o (su_op), - .mu_op_o (mu_op), - .shared_o (dec_shared), - .dirty_o (dec_dirty), - .data_available_o (cd_data_available_in), - .first_responder_o (dec_first_responder), - - .lookup_req_o (dec_lookup_req), - .lookup_addr_o (dec_lookup_addr), - .cd_fifo_stall_i (dec_cd_fifo_stall), - - .b_queue_full_i (~b_inp_gnt), - .r_queue_full_i (~r_inp_gnt), - .b_collision_i (b_exists), - .r_collision_i (r_exists), - .b_queue_push_o (b_queue_push), - .r_queue_push_o (r_queue_push), - .b_queue_aw_o (b_queue_aw), - .r_queue_ar_o (r_queue_ar), - - .perf_evt_o () - -); - -ccu_ctrl_snoop_unit #( - .DcacheLineWidth (DcacheLineWidth), - .AxiDataWidth (AxiDataWidth), - .NoMstPorts (NoMstPorts), - .SlvAxiIDWidth (SlvAxiIDWidth), - .slv_aw_chan_t (slv_aw_chan_t), - .w_chan_t (w_chan_t), - .slv_b_chan_t (slv_b_chan_t), - .slv_ar_chan_t (slv_ar_chan_t), - .slv_r_chan_t (slv_r_chan_t), - .slv_req_t (slv_req_t), - .slv_resp_t (slv_resp_t), - .snoop_ac_t (snoop_ac_t), - .snoop_cr_t (snoop_cr_t), - .snoop_cd_t (snoop_cd_t), - .snoop_req_t (snoop_req_t), - .snoop_resp_t (snoop_resp_t) -) ccu_ctrl_snoop_unit_i ( - .clk_i, - .rst_ni, - - .r_o (su_r), - .r_valid_o (su_r_valid), - .r_ready_i (su_r_ready), - - .cd_i (cd_first_responder), - .cd_handshake_i (su_cd_handshake), - .cd_fifo_full_o (su_cd_fifo_full), - - .ccu_req_holder_i (dec_ccu_req_holder), - - .su_gnt_o (su_gnt), - .su_req_i (su_req), - .su_op_i (su_op), - - .shared_i (dec_shared), - .dirty_i (dec_dirty) -); - -ccu_ctrl_memory_unit #( - .DcacheLineWidth (DcacheLineWidth), - .AxiDataWidth (AxiDataWidth), - .NoMstPorts (NoMstPorts), - .SlvAxiIDWidth (SlvAxiIDWidth), - .mst_aw_chan_t (mst_aw_chan_t), - .w_chan_t (w_chan_t), - .mst_b_chan_t (mst_b_chan_t), - .mst_ar_chan_t (mst_ar_chan_t), - .mst_r_chan_t (mst_r_chan_t), - .mst_req_t (mst_req_t), - .mst_resp_t (mst_resp_t), - .slv_aw_chan_t (slv_aw_chan_t), - .slv_b_chan_t (slv_b_chan_t), - .slv_ar_chan_t (slv_ar_chan_t), - .slv_r_chan_t (slv_r_chan_t), - .slv_req_t (slv_req_t), - .slv_resp_t (slv_resp_t), - .snoop_ac_t (snoop_ac_t), - .snoop_cr_t (snoop_cr_t), - .snoop_cd_t (snoop_cd_t), - .snoop_req_t (snoop_req_t), - .snoop_resp_t (snoop_resp_t) -) ccu_ctrl_memory_unit_i ( - .clk_i, - .rst_ni, - - .ccu_req_i (mu_ccu_req), - .ccu_resp_o (mu_ccu_resp), - - .ccu_req_o, - .ccu_resp_i, - - .cd_i (cd_first_responder), - .cd_handshake_i (mu_cd_handshake), - .cd_fifo_full_o (mu_cd_fifo_full), - - .ccu_req_holder_i (dec_ccu_req_holder), - .mu_gnt_o (mu_gnt), - .mu_req_i (mu_req), - .mu_op_i (mu_op), - .first_responder_i (dec_first_responder), - - .perf_evt_o (perf_evt) -); - -/////////////////// -// R arbitration // -/////////////////// - -logic [1:0] r_valid_in, r_ready_in; -slv_r_chan_t [1:0] r_chans_in; - -slv_r_chan_t r_chan_out; -logic r_valid_out, r_ready_out; - -always_comb begin - mu_ccu_req = ccu_req_i; - - r_valid_in = {mu_ccu_resp.r_valid, su_r_valid}; - r_chans_in = {mu_ccu_resp.r, su_r}; - {mu_ccu_req.r_ready, su_r_ready} = r_ready_in; -end - -rr_arb_tree #( - .NumIn ( 2 ), - .DataType ( slv_r_chan_t ), - .AxiVldRdy( 1'b1 ), - .LockIn ( 1'b1 ) -) r_arbiter_i ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i( 1'b0 ), - .rr_i ( '0 ), - .req_i ( r_valid_in ), - .gnt_o ( r_ready_in ), - .data_i ( r_chans_in ), - .gnt_i ( r_ready_out ), - .req_o ( r_valid_out ), - .data_o ( r_chan_out ), - .idx_o ( ) -); - -always_comb begin - // Resp - ccu_resp_o = mu_ccu_resp; - - ccu_resp_o.r = r_chan_out; - ccu_resp_o.r_valid = r_valid_out; - r_ready_out = ccu_req_i.r_ready; - - ccu_resp_o.ar_ready = ccu_ar_ready; - ccu_resp_o.aw_ready = ccu_aw_ready; -end - -// Snoop AC and CR -for (genvar i = 0; i < NoMstPorts; i++) begin - assign s2m_req_o[i].ac = dec_snoop_req[i].ac; - assign s2m_req_o[i].ac_valid = dec_snoop_req[i].ac_valid; - assign s2m_req_o[i].cr_ready = dec_snoop_req[i].cr_ready; -end - -///////////////////// -// Collision Check // -///////////////////// - -logic [AxiAddrWidth-1:0] b_inp_aligned_addr; -logic [AxiAddrWidth-1:0] b_exists_aligned_addr; -logic [AxiAddrWidth-1:0] r_inp_aligned_addr; -logic [AxiAddrWidth-1:0] r_exists_aligned_addr; - -assign b_inp_aligned_addr = axi_pkg::aligned_addr(b_queue_aw.addr,b_queue_aw.size); -assign b_exists_aligned_addr = dec_lookup_addr; - -assign r_inp_aligned_addr = axi_pkg::aligned_addr(r_queue_ar.addr,r_queue_ar.size); -assign r_exists_aligned_addr = dec_lookup_addr; - -// Exists - -// _gnt is not used as it is combinationally set when req = 1 - -assign b_exists_data = b_exists_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; -assign b_exists_mask = '1; -assign b_exists_req = dec_lookup_req; - -assign r_exists_data = r_exists_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; -assign r_exists_mask = '1; -assign r_exists_req = dec_lookup_req; - -// Oup -assign b_oup_id = ccu_resp_o.b.id; -assign b_oup_pop = 1'b1; -assign b_oup_req = ccu_resp_o.b_valid && ccu_req_i.b_ready; - -assign r_oup_id = ccu_resp_o.r.id; -assign r_oup_pop = 1'b1; -assign r_oup_req = ccu_resp_o.r_valid && ccu_req_i.r_ready && ccu_resp_o.r.last; - -// _data_* not used -// _gnt is not used as it is combinationally set when req = 1 - -// Inp -assign b_inp_id = b_queue_aw.id; -assign b_inp_data = b_inp_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; -assign b_inp_req = b_queue_push; - -assign r_inp_id = r_queue_ar.id; -assign r_inp_data = r_inp_aligned_addr[DCacheByteOffset+:IdQueueDataWidth]; -assign r_inp_req = r_queue_push; - -id_queue #( - .ID_WIDTH (SlvAxiIDWidth+1), - .CAPACITY (6), - .FULL_BW (1), - .CUT_OUP_POP_INP_GNT (1), - .data_t (id_queue_data_t) -) b_id_queue ( - .clk_i, - .rst_ni, - - .inp_id_i (b_inp_id), - .inp_data_i (b_inp_data), - .inp_req_i (b_inp_req), - .inp_gnt_o (b_inp_gnt), - - .exists_data_i (b_exists_data), - .exists_mask_i (b_exists_mask), - .exists_req_i (b_exists_req), - .exists_o (b_exists), - .exists_gnt_o (b_exists_gnt), - - .oup_id_i (b_oup_id), - .oup_pop_i (b_oup_pop), - .oup_req_i (b_oup_req), - .oup_data_o (b_oup_data), - .oup_data_valid_o (b_oup_data_valid), - .oup_gnt_o (b_oup_gnt) -); - -id_queue #( - .ID_WIDTH (SlvAxiIDWidth+1), - .CAPACITY (6), - .FULL_BW (1), - .CUT_OUP_POP_INP_GNT (1), - .data_t (id_queue_data_t) -) r_id_queue ( - .clk_i, - .rst_ni, - - .inp_id_i (r_inp_id), - .inp_data_i (r_inp_data), - .inp_req_i (r_inp_req), - .inp_gnt_o (r_inp_gnt), - - .exists_data_i (r_exists_data), - .exists_mask_i (r_exists_mask), - .exists_req_i (r_exists_req), - .exists_o (r_exists), - .exists_gnt_o (r_exists_gnt), - - .oup_id_i (r_oup_id), - .oup_pop_i (r_oup_pop), - .oup_req_i (r_oup_req), - .oup_data_o (r_oup_data), - .oup_data_valid_o (r_oup_data_valid), - .oup_gnt_o (r_oup_gnt) -); - -//////////////////// -// CD arbitration // -//////////////////// - -logic mu_wb_op, su_wb_op; - -logic cd_user_pop, cd_user_push, cd_user_empty, cd_user_full; - -typedef enum logic { MEMORY_UNIT, SNOOP_UNIT } cd_user_t; - -cd_user_t cd_user_in, cd_user_out; - -logic cd_done; - -assign mu_wb_op = mu_op inside {SEND_AXI_REQ_WRITE_BACK_R, SEND_AXI_REQ_WRITE_BACK_W}; -assign su_wb_op = su_op == READ_SNP_DATA; - -assign dec_cd_fifo_stall = cd_user_full; - -logic cd_user_pushed_d, cd_user_pushed_q; - -always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - cd_user_pushed_q <= '0; - end else begin - cd_user_pushed_q <= cd_user_pushed_d; - end -end - -always_comb begin - cd_user_pushed_d = cd_user_pushed_q; - cd_user_push = 1'b0; - cd_user_in = MEMORY_UNIT; - if (mu_req && mu_wb_op) begin - cd_user_pushed_d = !mu_gnt; - cd_user_push = !cd_user_pushed_q; - cd_user_in = MEMORY_UNIT; - end else if (su_req && su_wb_op) begin - cd_user_pushed_d = !su_gnt; - cd_user_push = !cd_user_pushed_q; - cd_user_in = SNOOP_UNIT; - end -end - -always_comb begin - su_cd_handshake = '0; - mu_cd_handshake = '0; - cd_fifo_full = '0; - cd_done = '0; - - if (!cd_user_empty) begin - cd_done = cd_last_q == cd_data_available_out; - case (cd_user_out) - MEMORY_UNIT: begin - mu_cd_handshake = cd_handshake; - cd_fifo_full = mu_cd_fifo_full; - end - SNOOP_UNIT: begin - su_cd_handshake = cd_handshake; - cd_fifo_full = su_cd_fifo_full; - end - endcase - end -end - -for (genvar i = 0; i < NoMstPorts; i++) begin - assign cd_ready[i] = (cd_first_responder_out == i && cd_fifo_full) ? '0 : - !cd_user_empty && !cd_last_q[i] && cd_data_available_out[i]; -end - -for (genvar i = 0; i < NoMstPorts; i++) begin - assign cd[i] = m2s_resp_i[i].cd; - assign cd_valid[i] = m2s_resp_i[i].cd_valid; - assign s2m_req_o[i].cd_ready = cd_ready[i]; -end - -logic cd_user_out_temp, cd_user_in_temp; -assign cd_user_in_temp = logic'(cd_user_in); -assign cd_user_out = cd_user_t'(cd_user_out_temp); -assign cd_first_responder_in = dec_first_responder; - -assign cd_user_pop = cd_done; - -fifo_v3 #( - .FALL_THROUGH(1), - .DATA_WIDTH(1 + NoMstPorts + MstIdxBits), - .DEPTH(4) -) cd_ordering_fifo_i ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i (1'b0), - .testmode_i (1'b0), - .full_o (cd_user_full), - .empty_o (cd_user_empty), - .usage_o (), - .data_i ({cd_user_in_temp, cd_first_responder_in, cd_data_available_in}), - .push_i (cd_user_push), - .data_o ({cd_user_out_temp, cd_first_responder_out, cd_data_available_out}), - .pop_i (cd_user_pop) -); - -for (genvar i = 0; i < NoMstPorts; i = i + 1) begin - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - cd_last_q[i] <= '0; - end else if(cd_done) begin - cd_last_q[i] <= '0; - end else if(cd_valid[i]) begin - cd_last_q[i] <= (cd[i].last & cd_data_available_out[i]); - end - end -end - -assign cd_first_responder = cd[cd_first_responder_out]; -assign cd_handshake = cd_valid[cd_first_responder_out] && cd_ready[cd_first_responder_out]; - -always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - perf_evt_o <= '0; - end else begin - perf_evt_o <= perf_evt; - end -end - -endmodule diff --git a/src/ccu/deprecated/ccu_ctrl_decoder.sv b/src/ccu/deprecated/ccu_ctrl_decoder.sv deleted file mode 100644 index 05fad39..0000000 --- a/src/ccu/deprecated/ccu_ctrl_decoder.sv +++ /dev/null @@ -1,603 +0,0 @@ -module ccu_ctrl_decoder import ccu_ctrl_pkg::*; -#( - parameter int unsigned DcacheLineWidth = 0, - parameter int unsigned AxiDataWidth = 0, - parameter int unsigned AxiAddrWidth = 0, - parameter int unsigned NoMstPorts = 4, - parameter int unsigned SlvAxiIDWidth = 0, - parameter bit PerfCounters = 0, - parameter type slv_aw_chan_t = logic, - parameter type w_chan_t = logic, - parameter type slv_b_chan_t = logic, - parameter type slv_ar_chan_t = logic, - parameter type slv_r_chan_t = logic, - parameter type slv_req_t = logic, - parameter type slv_resp_t = logic, - parameter type snoop_ac_t = logic, - parameter type snoop_cr_t = logic, - parameter type snoop_cd_t = logic, - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic, - localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, - localparam int unsigned MstIdxBits = $clog2(NoMstPorts) -) ( - //clock and reset - input clk_i, - input rst_ni, - // CCU Request in - input slv_req_t ccu_req_i, - // Snoop channel resuest and response - output snoop_req_t [NoMstPorts-1:0] s2m_req_o, - input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i, - - output logic slv_aw_ready_o, - output logic slv_ar_ready_o, - - output slv_req_t ccu_req_holder_o, - - output logic su_req_o, - input logic su_gnt_i, - output logic mu_req_o, - input logic mu_gnt_i, - - output mu_op_e mu_op_o, - output su_op_e su_op_o, - output logic shared_o, - output logic dirty_o, - output logic [NoMstPorts-1:0] data_available_o, - output logic [MstIdxBits-1:0] first_responder_o, - - output logic lookup_req_o, - output logic [AxiAddrWidth-1:0] lookup_addr_o, - output logic b_queue_push_o, - output slv_aw_chan_t b_queue_aw_o, - input logic b_queue_full_i, - output logic r_queue_push_o, - output slv_ar_chan_t r_queue_ar_o, - input logic r_queue_full_i, - input logic b_collision_i, - input logic r_collision_i, - - input logic cd_fifo_stall_i, - - output logic [7:0] perf_evt_o -); - - logic [NoMstPorts-1:0] ac_handshake_q, ac_handshake_d, ac_handshake; - - logic [NoMstPorts-1:0] cr_aw_initiator, cr_ar_initiator; - logic [NoMstPorts-1:0] cr_handshake_q, cr_handshake_d, cr_handshake; - - typedef enum logic [1:0] { INVALID_W, INVALID_R, RESP_R } cr_cmd_fifo_t; - - logic generic_stall; - logic ac_ctrl_ready; - logic cr_done; - - // AW FIFO - logic aw_fifo_empty, aw_fifo_full; - logic aw_fifo_pop, aw_fifo_push; - slv_aw_chan_t aw_fifo_in, aw_fifo_out; - - // AR FIFO - logic ar_fifo_empty, ar_fifo_full; - logic ar_fifo_pop, ar_fifo_push; - slv_ar_chan_t ar_fifo_in, ar_fifo_out; - - // CR CMD FIFO - logic cr_cmd_fifo_empty, cr_cmd_fifo_full; - logic cr_cmd_fifo_pop, cr_cmd_fifo_push; - cr_cmd_fifo_t cr_cmd_fifo_in, cr_cmd_fifo_out; - - logic ac_busy_q, ac_busy_d; - - // Hold incoming ACE request - - slv_aw_chan_t aw_holder; - logic aw_holder_valid, aw_holder_ready; - slv_ar_chan_t ar_holder; - logic ar_holder_valid, ar_holder_ready; - snoop_ac_t aw_ac, ar_ac; - logic [NoMstPorts-1:0] aw_initiator, ar_initiator; - - assign b_queue_push_o = aw_holder_ready && aw_holder_valid; - assign r_queue_push_o = ar_holder_ready && ar_holder_valid; - - assign b_queue_aw_o = aw_holder; - assign r_queue_ar_o = ar_holder; - - assign aw_initiator = 1 << aw_holder.id[SlvAxiIDWidth+:MstIdxBits]; - assign ar_initiator = 1 << ar_holder.id[SlvAxiIDWidth+:MstIdxBits]; - - - logic send_invalid_r; - logic collision; - - assign send_invalid_r = ar_holder.snoop == snoop_pkg::CleanUnique || ar_holder.lock; - assign collision = b_collision_i || r_collision_i; - - always_comb begin - aw_ac = '0; - aw_ac.addr = aw_holder.addr; - aw_ac.prot = aw_holder.prot; - aw_ac.snoop = snoop_pkg::CleanInvalid; - - ar_ac = '0; - ar_ac.addr = ar_holder.addr; - ar_ac.prot = ar_holder.prot; - ar_ac.snoop = send_invalid_r ? snoop_pkg::CleanInvalid : ar_holder.snoop; - end - - spill_register #( - .T (slv_aw_chan_t), - .Bypass (1'b1) - ) aw_spill_register ( - .clk_i, - .rst_ni, - .valid_i (ccu_req_i.aw_valid), - .ready_o (slv_aw_ready_o), - .data_i (ccu_req_i.aw), - .valid_o (aw_holder_valid), - .ready_i (aw_holder_ready), - .data_o (aw_holder) - ); - - spill_register #( - .T (slv_ar_chan_t), - .Bypass (1'b1) - ) ar_spill_register ( - .clk_i, - .rst_ni, - .valid_i (ccu_req_i.ar_valid), - .ready_o (slv_ar_ready_o), - .data_i (ccu_req_i.ar), - .valid_o (ar_holder_valid), - .ready_i (ar_holder_ready), - .data_o (ar_holder) - ); - - logic [1:0] arb_req_in, arb_gnt_in; - logic arb_req_out, arb_gnt_out; - snoop_ac_t arb_ac_out; - logic arb_idx_out; - - assign arb_req_in = {aw_holder_valid, ar_holder_valid}; - assign {aw_holder_ready, ar_holder_ready} = arb_gnt_in; - - rr_arb_tree #( - .NumIn ( 2 ), - .DataType ( snoop_ac_t ), - .AxiVldRdy( 1'b1 ), - .LockIn ( 1'b1 ), - .ExtPrio ( 1'b0 ) - ) arbiter_i ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i( 1'b0 ), - .rr_i ( '0 ), - .req_i ( arb_req_in ), - .gnt_o ( arb_gnt_in ), - .data_i ( {aw_ac, ar_ac} ), - .req_o ( arb_req_out ), - .gnt_i ( arb_gnt_out ), - .data_o ( arb_ac_out ), - .idx_o ( arb_idx_out ) - ); - - assign generic_stall = |{ - // CR CMD FIFO full - cr_cmd_fifo_full, - // CD CMD FIFO full - cd_fifo_stall_i, - // AR requests, ID queue or FIFO full - arb_idx_out == 0 && (r_queue_full_i || ar_fifo_full), - // AW requests, ID queue or FIFO full - arb_idx_out == 1 && (b_queue_full_i || aw_fifo_full) - }; - assign arb_gnt_out = !generic_stall && !collision && ac_ctrl_ready; - assign lookup_req_o = arb_req_out; - assign lookup_addr_o = arb_idx_out == 1 ? - axi_pkg::aligned_addr(aw_holder.addr,aw_holder.size): - axi_pkg::aligned_addr(ar_holder.addr,ar_holder.size); - - - for (genvar i = 0; i < NoMstPorts; i = i + 1) begin - assign ac_handshake[i] = m2s_resp_i[i].ac_ready & s2m_req_o[i].ac_valid; - assign cr_handshake[i] = m2s_resp_i[i].cr_valid & s2m_req_o[i].cr_ready; - end - - snoop_ac_t ac_out; - logic [NoMstPorts-1:0] ac_out_valid; - logic [NoMstPorts-1:0] cr_out_ready; - - // Hold snoop AC handshakes - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - ac_handshake_q <= '0; - end else begin - ac_handshake_q <= ac_handshake_d; - end - end - - // Hold snoop CR handshakes - logic [NoMstPorts-1:0] data_available_q, response_error_q, shared_q, dirty_q; - logic [NoMstPorts-1:0] data_available_d, response_error_d, shared_d, dirty_d; - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - cr_handshake_q <= '0; - data_available_q <= '0; - shared_q <= '0; - dirty_q <= '0; - response_error_q <= '0; - end else if(cr_done) begin - cr_handshake_q <= '0; - data_available_q <= '0; - shared_q <= '0; - dirty_q <= '0; - response_error_q <= '0; - end else begin - cr_handshake_q <= cr_handshake_d; - data_available_q <= data_available_d; - shared_q <= shared_d; - dirty_q <= dirty_d; - response_error_q <= response_error_d; - end - end - - for (genvar i = 0; i < NoMstPorts; i = i + 1) begin - assign cr_handshake_d[i] = cr_handshake[i] ? 1'b1 : cr_handshake_q[i]; - assign data_available_d[i] = cr_handshake[i] ? m2s_resp_i[i].cr_resp.dataTransfer : data_available_q[i]; - assign shared_d[i] = cr_handshake[i] ? m2s_resp_i[i].cr_resp.isShared : shared_q[i]; - assign dirty_d[i] = cr_handshake[i] ? m2s_resp_i[i].cr_resp.passDirty : dirty_q[i]; - assign response_error_d[i] = cr_handshake[i] ? m2s_resp_i[i].cr_resp.error : response_error_q[i]; - end - - assign dirty_o = |dirty_d; - assign shared_o = |shared_d; - assign data_available_o = data_available_d; - - logic [MstIdxBits-1:0] first_responder_q, first_responder_d; - logic snoop_resp_found_q, snoop_resp_found_d; - - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - first_responder_q <= '0; - snoop_resp_found_q <= 1'b0; - end else if(cr_done) begin - first_responder_q <= '0; - snoop_resp_found_q <= 1'b0; - end else if (!snoop_resp_found_q) begin - first_responder_q <= first_responder_d; - snoop_resp_found_q <= snoop_resp_found_d; - end - end - - always_comb begin - first_responder_d = first_responder_q; - snoop_resp_found_d = snoop_resp_found_q; - for (int i = 0; i < NoMstPorts; i = i + 1) begin - if(cr_handshake[i] & m2s_resp_i[i].cr_resp.dataTransfer & !m2s_resp_i[i].cr_resp.error) begin - first_responder_d = i[MstIdxBits-1:0]; - snoop_resp_found_d = 1'b1; - break; - end - end - end - - assign first_responder_o = first_responder_d; - - snoop_ac_t ac_q, ac_d; - - logic mu_done_d, mu_done_q; - logic su_done_d, su_done_q; - - // ---------------------- - // Current State Block - // ---------------------- - always_ff @(posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - ac_busy_q <= '0; - ac_q <= '0; - mu_done_q <= '0; - su_done_q <= '0; - end else begin - ac_busy_q <= ac_busy_d; - ac_q <= ac_d; - mu_done_q <= mu_done_d; - su_done_q <= su_done_d; - end - end - - // ---------------------- - // Current State Block - // ---------------------- - - always_comb begin - - ac_ctrl_ready = 1'b0; - - ac_d = ac_q; - ac_out_valid = '0; - ac_out = ac_q; - - // Next state - ac_busy_d = ac_busy_q; - ac_handshake_d = ac_handshake_q; - - cr_cmd_fifo_in = RESP_R; - aw_fifo_push = 1'b0; - ar_fifo_push = 1'b0; - - case (ac_busy_q) - 1'b0: begin - ac_ctrl_ready = 1'b1; - ac_out = arb_ac_out; - ac_handshake_d = '0; - if (arb_req_out && !generic_stall && !collision) begin - ac_d = arb_ac_out; - if (arb_idx_out == 1) begin - aw_fifo_push = 1'b1; - cr_cmd_fifo_in = INVALID_W; - ac_handshake_d = ac_handshake | aw_initiator; - ac_out_valid = ~aw_initiator; - ac_busy_d = (ac_handshake | aw_initiator) != '1; - end else if (arb_idx_out == 0) begin - ar_fifo_push = 1'b1; - cr_cmd_fifo_in = send_invalid_r ? INVALID_R : RESP_R; - ac_handshake_d = ac_handshake | ar_initiator; - ac_out_valid = ~ar_initiator; - ac_busy_d = (ac_handshake | ar_initiator) != '1; - end - end - end - 1'b1: begin - ac_out_valid = ~ac_handshake_q; - ac_handshake_d = ac_handshake | ac_handshake_q; - ac_out = ac_q; - if ((ac_handshake | ac_handshake_q) == '1) begin - ac_ctrl_ready = 1'b1; - if (arb_req_out && !generic_stall && !collision) begin - ac_d = arb_ac_out; - ac_busy_d = 1'b1; - if (arb_idx_out == 1) begin - aw_fifo_push = 1'b1; - cr_cmd_fifo_in = INVALID_W; - ac_handshake_d = aw_initiator; - end else if (arb_idx_out == 0) begin - ar_fifo_push = 1'b1; - cr_cmd_fifo_in = send_invalid_r ? INVALID_R : RESP_R; - ac_handshake_d = ar_initiator; - end - end else begin - ac_busy_d = 1'b0; - ac_handshake_d = '0; - end - end - end - endcase - end - - assign cr_aw_initiator = 1 << aw_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; - assign cr_ar_initiator = 1 << ar_fifo_out.id[SlvAxiIDWidth+:MstIdxBits]; - - always_comb begin - - mu_done_d = mu_done_q; - su_done_d = su_done_q; - - su_req_o = 1'b0; - mu_req_o = 1'b0; - su_op_o = READ_SNP_DATA; - mu_op_o = SEND_AXI_REQ_R; - - aw_fifo_pop = '0; - ar_fifo_pop = '0; - - cr_out_ready = '0; - - cr_done = 1'b0; - - if (!cr_cmd_fifo_empty) begin - case (cr_cmd_fifo_out) - RESP_R: begin - // wait for all CR handshakes - if (cr_handshake_d == ~cr_ar_initiator) begin - - if(|(data_available_d & ~response_error_d)) begin - su_op_o = READ_SNP_DATA; - su_req_o = 1'b1; - if (su_gnt_i) begin - ar_fifo_pop = 1'b1; - cr_done = 1'b1; - end - end else begin - mu_op_o = SEND_AXI_REQ_R; - mu_req_o = 1'b1; - if (mu_gnt_i) begin - ar_fifo_pop = 1'b1; - cr_done = 1'b1; - end - end - end - - cr_out_ready = ~(cr_handshake_q | cr_ar_initiator); - end - - INVALID_R: begin - // TODO: sending the ack R transaction could be moved from - // the snoop unit directly here - // wait for all CR handshakes - if (cr_handshake_d == ~cr_ar_initiator) begin - - su_req_o = !ar_fifo_out.lock && !su_done_q; - su_done_d = su_gnt_i || su_done_q; - - if(|(data_available_d & ~response_error_d)) begin - mu_op_o = SEND_AXI_REQ_WRITE_BACK_R; - mu_req_o = !mu_done_q; - cr_done = ar_fifo_out.lock ? mu_gnt_i : - &({mu_gnt_i, su_gnt_i} | {mu_done_q, su_done_q}); - end else if (ar_fifo_out.lock) begin - mu_op_o = SEND_AXI_REQ_R; - mu_req_o = !mu_done_q; - cr_done = mu_gnt_i; - end else begin - cr_done = su_gnt_i; - end - - mu_done_d = mu_gnt_i || mu_done_q; - - if (cr_done) begin - ar_fifo_pop = 1'b1; - mu_done_d = 1'b0; - su_done_d = 1'b0; - end - end - - su_op_o = SEND_INVALID_ACK_R; - - cr_out_ready = ~(cr_handshake_q | cr_ar_initiator); - end - - INVALID_W: begin - // wait for all CR handshakes - if (cr_handshake_d == ~cr_aw_initiator) begin - - mu_req_o = 1'b1; - - if (mu_gnt_i) begin - aw_fifo_pop = 1'b1; - cr_done = 1'b1; - end - - if(|(data_available_d & ~response_error_d)) begin - mu_op_o = SEND_AXI_REQ_WRITE_BACK_W; - end else begin - mu_op_o = SEND_AXI_REQ_W; - end - end - - cr_out_ready = ~(cr_handshake_q | cr_aw_initiator); - end - endcase - end - end - - always_comb begin - s2m_req_o = '0; - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac = ac_out; - s2m_req_o[n].ac_valid = ac_out_valid[n]; - s2m_req_o[n].cr_ready = cr_out_ready[n]; - end - end - - assign cr_cmd_fifo_push = aw_fifo_push || ar_fifo_push; - assign cr_cmd_fifo_pop = aw_fifo_pop || ar_fifo_pop; - - fifo_v3 #( - .FALL_THROUGH(0), - .DEPTH(4), - .dtype (cr_cmd_fifo_t) - ) cr_cmd_fifo_i ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i (1'b0), - .testmode_i (1'b0), - .full_o (cr_cmd_fifo_full), - .empty_o (cr_cmd_fifo_empty), - .usage_o (), - .data_i (cr_cmd_fifo_in), - .push_i (cr_cmd_fifo_push), - .data_o (cr_cmd_fifo_out), - .pop_i (cr_cmd_fifo_pop) - ); - - assign ar_fifo_in = ar_holder; - assign ccu_req_holder_o.ar = ar_fifo_out; - - fifo_v3 #( - .FALL_THROUGH(0), - .DEPTH(4), - .dtype (slv_ar_chan_t) - ) ar_fifo_i ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i (1'b0), - .testmode_i (1'b0), - .full_o (ar_fifo_full), - .empty_o (ar_fifo_empty), - .usage_o (), - .data_i (ar_fifo_in), - .push_i (ar_fifo_push), - .data_o (ar_fifo_out), - .pop_i (ar_fifo_pop) - ); - - assign aw_fifo_in = aw_holder; - assign ccu_req_holder_o.aw = aw_fifo_out; - - fifo_v3 #( - .FALL_THROUGH(0), - .DEPTH(4), - .dtype (slv_aw_chan_t) - ) aw_fifo_i ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i (1'b0), - .testmode_i (1'b0), - .full_o (aw_fifo_full), - .empty_o (aw_fifo_empty), - .usage_o (), - .data_i (aw_fifo_in), - .push_i (aw_fifo_push), - .data_o (aw_fifo_out), - .pop_i (aw_fifo_pop) - ); - - if (PerfCounters) begin : gen_perf_events - logic perf_snoop_hit; - logic perf_snoop_miss; - logic perf_writeback; - logic perf_collision_cycles; - logic perf_collision_req; - logic perf_generic_stall; - logic perf_ac_busy_stall; - logic perf_mu_stall; - - logic collision_req_observed_q, collision_req_observed_d; - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - collision_req_observed_q <= '0; - end else begin - collision_req_observed_q <= collision_req_observed_d; - end - end - - // Perf counters - assign perf_snoop_hit = su_req_o && su_gnt_i && cr_cmd_fifo_out == RESP_R && su_op_o == READ_SNP_DATA; - assign perf_snoop_miss = mu_req_o && mu_gnt_i && cr_cmd_fifo_out == RESP_R && mu_op_o == SEND_AXI_REQ_R; - assign perf_writeback = mu_req_o && mu_gnt_i && mu_op_o inside {SEND_AXI_REQ_WRITE_BACK_W, SEND_AXI_REQ_WRITE_BACK_R}; - assign perf_collision_cycles = !ac_busy_q && arb_req_out && !generic_stall && collision; - assign perf_collision_req = perf_collision_cycles && !collision_req_observed_q; - assign perf_generic_stall = !ac_busy_q && arb_req_out && generic_stall; - assign perf_ac_busy_stall = arb_req_out && !ac_ctrl_ready; - assign perf_mu_stall = mu_req_o && !mu_gnt_i; - - assign perf_evt_o = { - perf_snoop_hit, - perf_snoop_miss, - perf_writeback, - perf_collision_cycles, - perf_collision_req, - perf_generic_stall, - perf_ac_busy_stall, - perf_mu_stall - }; - - assign collision_req_observed_d = perf_collision_cycles; - end else begin - assign perf_evt_o = '0; - end -endmodule \ No newline at end of file diff --git a/src/ccu/deprecated/ccu_ctrl_memory_unit.sv b/src/ccu/deprecated/ccu_ctrl_memory_unit.sv deleted file mode 100644 index 6dc8607..0000000 --- a/src/ccu/deprecated/ccu_ctrl_memory_unit.sv +++ /dev/null @@ -1,386 +0,0 @@ -module ccu_ctrl_memory_unit import ccu_ctrl_pkg::*; -#( - parameter int unsigned DcacheLineWidth = 0, - parameter int unsigned AxiDataWidth = 0, - parameter int unsigned NoMstPorts = 4, - parameter int unsigned SlvAxiIDWidth = 0, - parameter bit PerfCounters = 0, - parameter type mst_aw_chan_t = logic, - parameter type w_chan_t = logic, - parameter type mst_b_chan_t = logic, - parameter type mst_ar_chan_t = logic, - parameter type mst_r_chan_t = logic, - parameter type mst_req_t = logic, - parameter type mst_resp_t = logic, - parameter type slv_aw_chan_t = logic, - parameter type slv_b_chan_t = logic, - parameter type slv_ar_chan_t = logic, - parameter type slv_r_chan_t = logic, - parameter type slv_req_t = logic, - parameter type slv_resp_t = logic, - parameter type snoop_ac_t = logic, - parameter type snoop_cr_t = logic, - parameter type snoop_cd_t = logic, - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic, - localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, - localparam int unsigned MstIdxBits = $clog2(NoMstPorts) -) ( - //clock and reset - input clk_i, - input rst_ni, - // CCU Request In and response out - input slv_req_t ccu_req_i, - output slv_resp_t ccu_resp_o, - //CCU Request Out and response in - output mst_req_t ccu_req_o, - input mst_resp_t ccu_resp_i, - - input snoop_cd_t cd_i, - input logic cd_handshake_i, - output logic cd_fifo_full_o, - - - input slv_req_t ccu_req_holder_i, - output logic mu_gnt_o, - input logic mu_req_i, - input mu_op_e mu_op_i, - input logic [MstIdxBits-1:0] first_responder_i, - - output logic [7:0] perf_evt_o -); - -localparam CD_FIFO_DEPTH = 4; -localparam AXI_FIFO_DEPTH = 0; // Passthrough -localparam W_FIFO_DEPTH = 2; - -mst_req_t ccu_req_out; -mst_resp_t ccu_resp_in; - -slv_req_t ccu_req_holder_q, ccu_req_holder_d; -logic [MstIdxBits-1:0] first_responder_q, first_responder_d; - -logic cd_fifo_pop, cd_fifo_empty; -logic [AxiDataWidth-1:0] cd_fifo_data_out; - -always_ff @(posedge clk_i , negedge rst_ni) begin - if(!rst_ni) begin - ccu_req_holder_q <= '0; - first_responder_q <= '0; - end else if (mu_gnt_o && mu_req_i) begin - ccu_req_holder_q <= ccu_req_holder_d; - first_responder_q <= first_responder_d; - end -end - -logic ax_busy_q, ax_busy_d; -mu_op_e ax_op_q, ax_op_d; - -always_ff @(posedge clk_i , negedge rst_ni) begin - if(!rst_ni) begin - ax_busy_q <= 1'b0; - ax_op_q <= SEND_AXI_REQ_R; - end else begin - ax_busy_q <= ax_busy_d; - ax_op_q <= ax_op_d; - end -end - -mst_ar_chan_t ar_out; -mst_aw_chan_t aw_out; - -logic ar_valid_out, aw_valid_out; - -logic w_last_d, w_last_q; - -typedef enum logic {W_PASSTHROUGH, W_FROM_FIFO} w_state_t; - -logic w_fifo_full, w_fifo_empty; -logic w_fifo_push, w_fifo_pop; -w_state_t w_fifo_data_in, w_fifo_data_out; - -assign first_responder_d = !ax_busy_q ? first_responder_i : first_responder_q; -assign ccu_req_holder_d = !ax_busy_q ? ccu_req_holder_i : ccu_req_holder_q; -assign mu_gnt_o = !ax_busy_q ? mu_req_i : 1'b0; - -always_comb begin - ax_busy_d = ax_busy_q; - ax_op_d = ax_busy_q ? ax_op_q : mu_op_i; - - ar_out = '0; - aw_out = '0; - ar_valid_out = 1'b0; - aw_valid_out = 1'b0; - - w_fifo_push = 1'b0; - w_fifo_data_in = W_PASSTHROUGH; - - if (mu_req_i || ax_busy_q) begin - ax_busy_d = 1'b1; - case (ax_op_d) - SEND_AXI_REQ_R: begin - ar_valid_out = 'b1; - ar_out = ccu_req_holder_d.ar; - if (ccu_resp_in.ar_ready) begin - ax_busy_d = 1'b0; - end - end - SEND_AXI_REQ_WRITE_BACK_R: begin - // send writeback request - aw_valid_out = !w_fifo_full; - aw_out = '0; //default - aw_out.addr = ccu_req_holder_d.ar.addr; - aw_out.addr[3:0] = 4'b0; // writeback is always full cache line - aw_out.size = 2'b11; - aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - aw_out.id = {1'b1, first_responder_d, ccu_req_holder_d.ar.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations - aw_out.len = DcacheLineWords-1; - // WRITEBACK - aw_out.domain = 2'b00; - aw_out.snoop = 3'b011; - - w_fifo_data_in = W_FROM_FIFO; - - if (ccu_resp_in.aw_ready && !w_fifo_full) begin - w_fifo_push = 1'b1; - if (ccu_req_holder_d.ar.lock) begin - // Blocking behavior for AMO operations - // TODO: check if truly needed - ax_op_d = AMO_WAIT_WB_R; - end else begin - ax_busy_d = 1'b0; - end - end - end - SEND_AXI_REQ_W: begin - aw_valid_out = !w_fifo_full; - aw_out = ccu_req_holder_d.aw; - - w_fifo_data_in = W_PASSTHROUGH; - - if (ccu_resp_in.aw_ready && !w_fifo_full) begin - w_fifo_push = 1'b1; - ax_busy_d = 1'b0; - end - end - SEND_AXI_REQ_WRITE_BACK_W: begin - // send writeback request - aw_valid_out = !w_fifo_full; - aw_out = '0; //default - aw_out.addr = ccu_req_holder_d.aw.addr; - aw_out.addr[3:0] = 4'b0; // writeback is always full cache line - aw_out.size = 2'b11; - aw_out.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - aw_out.id = {1'b1, first_responder_d, ccu_req_holder_d.aw.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations - aw_out.len = DcacheLineWords-1; - // WRITEBACK - aw_out.domain = 2'b00; - aw_out.snoop = 3'b011; - - w_fifo_data_in = W_FROM_FIFO; - - if (ccu_resp_in.aw_ready && !w_fifo_full) begin - w_fifo_push = 1'b1; - ax_busy_d = 1'b1; - if (ccu_req_holder_d.aw.atop[5]) - ax_op_d = AMO_WAIT_WB_W; - else - ax_op_d = SEND_AXI_REQ_W; - end - end - AMO_WAIT_WB_R: begin - if(ccu_resp_in.b_valid && ccu_req_out.b_ready - && ccu_resp_in.b.id == {1'b1, first_responder_q, ccu_req_holder_q.ar.id[SlvAxiIDWidth-1:0]}) - ax_op_d = SEND_AXI_REQ_R; - end - AMO_WAIT_WB_W: begin - if(ccu_resp_in.b_valid && ccu_req_out.b_ready && - ccu_resp_in.b.id == {1'b1, first_responder_q, ccu_req_holder_q.aw.id[SlvAxiIDWidth-1:0]}) - ax_op_d = SEND_AXI_REQ_W; - end - endcase - end -end - - -assign cd_fifo_pop = w_fifo_data_out == W_FROM_FIFO && - ccu_resp_in.w_ready && ccu_req_out.w_valid; - -fifo_v3 #( - .FALL_THROUGH(1), - .DATA_WIDTH(AxiDataWidth), - .DEPTH(CD_FIFO_DEPTH) - ) cd_memory_fifo_i ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i (1'b0), - .testmode_i (1'b0), - .full_o (cd_fifo_full_o), - .empty_o (cd_fifo_empty), - .usage_o (), - .data_i (cd_i.data), - .push_i (cd_handshake_i), - .data_o (cd_fifo_data_out), - .pop_i (cd_fifo_pop) -); - -// AR -assign ccu_req_out.ar = ar_out; -assign ccu_req_out.ar_valid = ar_valid_out; - -// AW -assign ccu_req_out.aw = aw_out; -assign ccu_req_out.aw_valid = aw_valid_out; - -// R passthrough -assign ccu_resp_o.r = ccu_resp_in.r; -assign ccu_resp_o.r_valid = ccu_resp_in.r_valid; -assign ccu_req_out.r_ready = ccu_req_i.r_ready; - -// W and B - -always_ff @(posedge clk_i or negedge rst_ni) begin - if(!rst_ni) begin - w_last_q <= 1'b0; - end else begin - w_last_q <= w_last_d; - end -end - -fifo_v3 #( - .FALL_THROUGH(1), - .DEPTH(W_FIFO_DEPTH), - .dtype(w_state_t) - ) w_fifo_i ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i (1'b0), - .testmode_i (1'b0), - .full_o (w_fifo_full), - .empty_o (w_fifo_empty), - .usage_o (), - .data_i (w_fifo_data_in), - .push_i (w_fifo_push), - .data_o (w_fifo_data_out), - .pop_i (w_fifo_pop) -); - - - -always_comb begin - ccu_req_out.w = ccu_req_i.w; - ccu_req_out.w_valid = 1'b0; - ccu_resp_o.w_ready = 1'b0; - - w_fifo_pop = 1'b0; - - w_last_d = w_last_q; - - if (!w_fifo_empty) begin - case (w_fifo_data_out) - W_PASSTHROUGH: begin - ccu_req_out.w_valid = ccu_req_i.w_valid; - ccu_resp_o.w_ready = ccu_resp_in.w_ready; - - if(ccu_resp_in.w_ready && ccu_req_i.w_valid && ccu_req_i.w.last) - w_fifo_pop = 1'b1; - end - W_FROM_FIFO: begin - // Connect the FIFO as long as the transmission is ongoing - w_last_d = (ccu_resp_in.w_ready && !cd_fifo_empty) || w_last_q; - ccu_req_out.w_valid = !cd_fifo_empty; - ccu_req_out.w.strb = '1; - ccu_req_out.w.data = cd_fifo_data_out; - ccu_req_out.w.last = w_last_q; - - if(ccu_resp_in.w_ready && !cd_fifo_empty && w_last_q) begin - w_last_d = 1'b0; - w_fifo_pop = 1'b1; - end - end - endcase - end -end - -assign ccu_resp_o.b = ccu_resp_in.b; - -// An additional bit in the ID is used to verify whether the CCU -// issued the request or simply forwarded one from the core -logic is_wb_resp; -assign is_wb_resp = (ccu_resp_in.b.id[SlvAxiIDWidth+$clog2(NoMstPorts)] == 1'b1); - -always_comb begin - ccu_req_out.b_ready = 1'b0; - ccu_resp_o.b_valid = 1'b0; - - if (is_wb_resp) begin - // Response to a WB issued by the CCU - ccu_req_out.b_ready = 'b1; - end else begin - // Response to a core request - ccu_req_out.b_ready = ccu_req_i.b_ready; - ccu_resp_o.b_valid = ccu_resp_in.b_valid; - end -end - - -axi_fifo #( - .Depth (AXI_FIFO_DEPTH), - .aw_chan_t (mst_aw_chan_t), - .w_chan_t (w_chan_t), - .b_chan_t (mst_b_chan_t), - .ar_chan_t (mst_ar_chan_t), - .r_chan_t (mst_r_chan_t), - .axi_req_t (mst_req_t), - .axi_resp_t(mst_resp_t) -) fifo_to_from_mem_i ( - .clk_i, - .rst_ni, - .test_i (1'b0), - // slave port - .slv_req_i (ccu_req_out), - .slv_resp_o (ccu_resp_in), - // master port - .mst_req_o (ccu_req_o), - .mst_resp_i (ccu_resp_i) -); - -if (PerfCounters) begin : gen_perf_events - - logic perf_send_axi_req_r; - logic perf_send_axi_req_write_back_r; - logic perf_send_axi_req_w; - logic perf_send_axi_req_write_back_w; - logic perf_cd_fifo_full; - logic perf_amo_wait_wb_r; - logic perf_amo_wait_wb_w; - logic perf_w_fifo_full; - - logic ungranted_request; - assign ungranted_request = mu_req_i && !mu_gnt_o; - - assign perf_send_axi_req_r = ungranted_request && ax_op_q == SEND_AXI_REQ_R; - assign perf_send_axi_req_write_back_r = ungranted_request && ax_op_q == SEND_AXI_REQ_WRITE_BACK_R; - assign perf_send_axi_req_w = ungranted_request && ax_op_q == SEND_AXI_REQ_W; - assign perf_send_axi_req_write_back_w = ungranted_request && ax_op_q == SEND_AXI_REQ_WRITE_BACK_W; - assign perf_amo_wait_wb_r = ungranted_request && ax_op_q == AMO_WAIT_WB_R; - assign perf_amo_wait_wb_w = ungranted_request && ax_op_q == AMO_WAIT_WB_W; - assign perf_cd_fifo_full = cd_fifo_full_o; - assign perf_w_fifo_full = w_fifo_full; - - assign perf_evt_o = { - perf_send_axi_req_r, - perf_send_axi_req_write_back_r, - perf_send_axi_req_w, - perf_send_axi_req_write_back_w, - perf_amo_wait_wb_r, - perf_amo_wait_wb_w, - perf_cd_fifo_full, - perf_w_fifo_full - }; -end else begin - assign perf_evt_o = '0; -end - - -endmodule \ No newline at end of file diff --git a/src/ccu/deprecated/ccu_ctrl_pkg.sv b/src/ccu/deprecated/ccu_ctrl_pkg.sv deleted file mode 100644 index 4061a39..0000000 --- a/src/ccu/deprecated/ccu_ctrl_pkg.sv +++ /dev/null @@ -1,19 +0,0 @@ -package ccu_ctrl_pkg; - - typedef enum logic [3:0] { - SEND_AXI_REQ_R, - SEND_AXI_REQ_WRITE_BACK_R, - SEND_AXI_REQ_W, - SEND_AXI_REQ_WRITE_BACK_W, - AMO_WAIT_WB_R, - AMO_WAIT_WB_W - } mu_op_e; - - typedef enum logic { - READ_SNP_DATA, - SEND_INVALID_ACK_R - } su_op_e; - - typedef enum logic { MEMORY_UNIT, SNOOP_UNIT } cd_user_t; - -endpackage \ No newline at end of file diff --git a/src/ccu/deprecated/ccu_ctrl_snoop_unit.sv b/src/ccu/deprecated/ccu_ctrl_snoop_unit.sv deleted file mode 100644 index 3ca24dd..0000000 --- a/src/ccu/deprecated/ccu_ctrl_snoop_unit.sv +++ /dev/null @@ -1,185 +0,0 @@ -module ccu_ctrl_snoop_unit import ccu_ctrl_pkg::*; -#( - parameter int unsigned DcacheLineWidth = 0, - parameter int unsigned AxiDataWidth = 0, - parameter int unsigned NoMstPorts = 4, - parameter int unsigned SlvAxiIDWidth = 0, - parameter type slv_aw_chan_t = logic, - parameter type w_chan_t = logic, - parameter type slv_b_chan_t = logic, - parameter type slv_ar_chan_t = logic, - parameter type slv_r_chan_t = logic, - parameter type slv_req_t = logic, - parameter type slv_resp_t = logic, - parameter type snoop_ac_t = logic, - parameter type snoop_cr_t = logic, - parameter type snoop_cd_t = logic, - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic, - localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth, - localparam int unsigned MstIdxBits = $clog2(NoMstPorts) -) ( - //clock and reset - input clk_i, - input rst_ni, - // CCU Request In and response out - output slv_r_chan_t r_o, - output logic r_valid_o, - input logic r_ready_i, - - input snoop_cd_t cd_i, - input logic cd_handshake_i, - output logic cd_fifo_full_o, - - input slv_req_t ccu_req_holder_i, - output logic su_gnt_o, - input logic su_req_i, - input su_op_e su_op_i, - input logic shared_i, - input logic dirty_i -); - -localparam FIFO_DEPTH = 2; - -logic [AxiDataWidth-1:0] fifo_data_in, fifo_data_out; -logic [$clog2(DcacheLineWords)-1:0] fifo_usage; - -logic su_busy_d, su_busy_q; -logic r_last_d, r_last_q; -su_op_e su_op_d, su_op_q; - -slv_req_t ccu_req_holder_q, ccu_req_holder_d; -logic shared_q, shared_d; -logic dirty_q, dirty_d; - -always_ff @(posedge clk_i , negedge rst_ni) begin - if(!rst_ni) begin - ccu_req_holder_q <= '0; - shared_q <= '0; - dirty_q <= '0; - end else begin - ccu_req_holder_q <= ccu_req_holder_d; - shared_q <= shared_d; - dirty_q <= dirty_d; - end -end - -always_ff @(posedge clk_i , negedge rst_ni) begin - if(!rst_ni) begin - su_busy_q <= '0; - su_op_q <= READ_SNP_DATA; - r_last_q <= '0; - end else begin - su_busy_q <= su_busy_d; - su_op_q <= su_op_d; - r_last_q <= r_last_d; - end -end - -logic ar_addr_offset; - -assign ar_addr_offset = ccu_req_holder_i.ar.addr[3]; - -logic fifo_full, fifo_empty, fifo_push, fifo_pop; - -assign cd_fifo_full_o = fifo_full; - -assign ccu_req_holder_d = su_busy_q ? ccu_req_holder_q : ccu_req_holder_i; -assign shared_d = su_busy_q ? shared_q : shared_i; -assign dirty_d = su_busy_q ? dirty_q : dirty_i; -assign su_op_d = su_busy_q ? su_op_q : su_op_i; - -always_comb begin - su_gnt_o = 1'b0; - - r_o = '0; - r_valid_o = 1'b0; - - fifo_pop = 1'b0; - - su_busy_d = su_busy_q; - r_last_d = r_last_q; - - if (su_req_i || su_busy_q) begin - su_gnt_o = !su_busy_q; - su_busy_d = 1'b1; - case (su_op_d) - READ_SNP_DATA: begin - // Prepare request - r_o.data = fifo_data_out; - r_o.id = ccu_req_holder_d.ar.id; - r_o.resp[3] = shared_d; // update if shared - r_o.resp[2] = dirty_d; // update if any line dirty - r_o.last = r_last_q; // No further transactions - - if (r_last_q) begin - r_valid_o = !fifo_empty; - if (r_ready_i && !fifo_empty) begin - fifo_pop = 1'b1; - su_busy_d = 1'b0; - r_last_d = 1'b0; - end - end else begin - // Single data request - if (ccu_req_holder_d.ar.len == 0) begin - // The lower 64 bits are required - if (!ar_addr_offset) begin - r_o.last = 1'b1; - r_valid_o = !fifo_empty; // There is something to send - if (r_ready_i && !fifo_empty) begin - fifo_pop = 1'b1; - su_busy_d = 1'b0; - end - end else begin - // The lower 64 bits are not needed - // Consume them and move the upper 64 bits - r_last_d = 1'b1; - fifo_pop = 1'b1; - end - end else begin - // Full cacheline request - r_valid_o = !fifo_empty; // There is something to send - if (r_ready_i && !fifo_empty) begin - fifo_pop = 1'b1; - r_last_d = 1'b1; - end - end - end - end - SEND_INVALID_ACK_R: begin - r_o = '0; - r_o.id = ccu_req_holder_d.ar.id; - r_o.last = 'b1; - r_valid_o = 'b1; - if (r_ready_i) begin - su_busy_d = 1'b0; - end - end - endcase - end -end - -assign fifo_push = cd_handshake_i; -assign fifo_flush = !(su_req_i || su_busy_q); -assign fifo_data_in = cd_i.data; - - - fifo_v3 #( - .FALL_THROUGH(1), - .DATA_WIDTH(AxiDataWidth), - .DEPTH(FIFO_DEPTH) - ) cd_snoop_fifo_i ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i (fifo_flush), - .testmode_i (1'b0), - .full_o (fifo_full), - .empty_o (fifo_empty), - .usage_o (fifo_usage), - .data_i (fifo_data_in), - .push_i (fifo_push), - .data_o (fifo_data_out), - .pop_i (fifo_pop) -); - -endmodule diff --git a/src/deprecated/ace_sim_master.sv b/src/deprecated/ace_sim_master.sv deleted file mode 100644 index 16f7409..0000000 --- a/src/deprecated/ace_sim_master.sv +++ /dev/null @@ -1,1543 +0,0 @@ -package ace_sim_master; - -import axi_test::*; - -typedef enum logic [3:0] { - AR_READ_NO_SNOOP, - AR_READ_ONCE, - AR_READ_SHARED, - AR_READ_CLEAN, - AR_READ_NOT_SHARED_DIRTY, - AR_READ_UNIQUE, - AR_CLEAN_UNIQUE, - AR_MAKE_UNIQUE, - AR_CLEAN_SHARED, - AR_CLEAN_INVALID, - AR_MAKE_INVALID, - AR_BARRIER, - AR_DVM_COMPLETE, - AR_DVM_MESSAGE -} ar_snoop_e; - -ar_snoop_e ar_unsupported_ops[] = '{AR_READ_NO_SNOOP, AR_BARRIER, AR_DVM_COMPLETE, AR_DVM_MESSAGE}; - -typedef enum logic [2:0] { - AW_WRITE_NO_SNOOP, - AW_WRITE_UNIQUE, - AW_WRITE_LINE_UNIQUE, - AW_WRITE_CLEAN, - AW_WRITE_BACK, - AW_EVICT, - AW_WRITE_EVICT, - AW_BARRIER -} aw_snoop_e; - -aw_snoop_e aw_unsupported_ops[] = '{AW_WRITE_NO_SNOOP, AW_BARRIER}; - -/// The data transferred on a beat on the AW/AR channels. -class ace_ax_beat #( - parameter AW = 32, - parameter IW = 8 , - parameter UW = 1 -); - rand logic [IW-1:0] ax_id = '0; - rand logic [AW-1:0] ax_addr = '0; - logic [7:0] ax_len = '0; - logic [2:0] ax_size = '0; - logic [1:0] ax_burst = '0; - logic ax_lock = '0; - logic [3:0] ax_cache = '0; - logic [2:0] ax_prot = '0; - rand logic [3:0] ax_qos = '0; - logic [3:0] ax_region = '0; - logic [5:0] ax_atop = '0; // Only defined on the AW channel. - rand logic [UW-1:0] ax_user = '0; - rand logic [3:0] ax_snoop = '0; // AW channel requires 3 bits, AR channel requires 4 bits - rand logic [1:0] ax_bar = '0; - rand logic [1:0] ax_domain = '0; - rand logic ax_awunique = '0; // Only for AW -endclass - -/// The data transferred on a beat on the R channel. -class ace_r_beat #( - parameter DW = 32, - parameter IW = 8 , - parameter UW = 1 -); - rand logic [IW-1:0] r_id = '0; - rand logic [DW-1:0] r_data = '0; - ace_pkg::rresp_t r_resp = '0; - logic r_last = '0; - rand logic [UW-1:0] r_user = '0; -endclass - -/// The data transferred on a beat on the AC channel. -/// Plus an extra signal to determine data transfer -class ace_ac_beat #( - parameter AW = 32 -); - logic [AW-1:0] ac_addr = '0; - logic [3:0] ac_snoop = '0; - logic [2:0] ac_prot = '0; - logic data_transfer = '0; -endclass - -/// The data transferred on a beat on the CD channel. -class ace_cd_beat #( - parameter DW = 32 -); - rand logic [DW-1:0] cd_data = '0; - logic cd_last; -endclass - -/// The data transferred on a beat on the CR channel. -class ace_cr_beat; - ace_pkg::crresp_t cr_resp = '0; -endclass - -class ace_driver #( - parameter int AW = 32, - parameter int DW = 32, - parameter int AC_AW = AW, - parameter int CD_DW = DW, - parameter int IW = 8, - parameter int UW = 1, - parameter time TA = 0ns, // stimuli application time - parameter time TT = 0ns // stimuli test time -); - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH (AW), - .AXI_DATA_WIDTH (DW), - .AXI_ID_WIDTH (IW), - .AXI_USER_WIDTH (UW) - ) ace; - - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH (AC_AW), - .SNOOP_DATA_WIDTH (CD_DW) - ) snoop; - - typedef ace_ax_beat #(.AW(AW), .IW(IW), .UW(UW)) ax_beat_t; - typedef axi_w_beat #(.DW(DW), .UW(UW)) w_beat_t; - typedef axi_b_beat #(.IW(IW), .UW(UW)) b_beat_t; - typedef ace_r_beat #(.DW(DW), .IW(IW), .UW(UW)) r_beat_t; - typedef ace_ac_beat #(.AW(AC_AW)) ac_beat_t; - typedef ace_cd_beat #(.DW(CD_DW)) cd_beat_t; - typedef ace_cr_beat cr_beat_t; - - function new ( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH (AW), - .AXI_DATA_WIDTH (DW), - .AXI_ID_WIDTH (IW), - .AXI_USER_WIDTH (UW) - ) ace, - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH (AC_AW), - .SNOOP_DATA_WIDTH (CD_DW) - ) snoop - ); - this.ace = ace; - this.snoop = snoop; - endfunction - - function void reset_master(); - ace.aw_id <= '0; - ace.aw_addr <= '0; - ace.aw_len <= '0; - ace.aw_size <= '0; - ace.aw_burst <= '0; - ace.aw_lock <= '0; - ace.aw_cache <= '0; - ace.aw_prot <= '0; - ace.aw_qos <= '0; - ace.aw_region <= '0; - ace.aw_atop <= '0; - ace.aw_user <= '0; - ace.aw_valid <= '0; - ace.aw_snoop <= '0; - ace.aw_bar <= '0; - ace.aw_domain <= '0; - ace.aw_awunique <= '0; - ace.w_data <= '0; - ace.w_strb <= '0; - ace.w_last <= '0; - ace.w_user <= '0; - ace.w_valid <= '0; - ace.b_ready <= '0; - ace.ar_id <= '0; - ace.ar_addr <= '0; - ace.ar_len <= '0; - ace.ar_size <= '0; - ace.ar_burst <= '0; - ace.ar_lock <= '0; - ace.ar_cache <= '0; - ace.ar_prot <= '0; - ace.ar_qos <= '0; - ace.ar_region <= '0; - ace.ar_user <= '0; - ace.ar_snoop <= '0; - ace.ar_bar <= '0; - ace.ar_domain <= '0; - ace.ar_valid <= '0; - ace.r_ready <= '0; - ace.wack <= '0; - ace.rack <= '0; - snoop.ac_ready <= '0; - snoop.cr_valid <= '0; - snoop.cr_resp <= '0; - snoop.cd_valid <= '0; - snoop.cd_data <= '0; - snoop.cd_last <= '0; - endfunction - - function void reset_slave(); - ace.aw_ready <= '0; - ace.w_ready <= '0; - ace.b_id <= '0; - ace.b_resp <= '0; - ace.b_user <= '0; - ace.b_valid <= '0; - ace.ar_ready <= '0; - ace.r_id <= '0; - ace.r_data <= '0; - ace.r_resp <= '0; - ace.r_last <= '0; - ace.r_user <= '0; - ace.r_valid <= '0; - snoop.ac_valid <= '0; - snoop.ac_addr <= '0; - snoop.ac_prot <= '0; - snoop.ac_snoop <= '0; - snoop.cr_ready <= '0; - snoop.cd_ready <= '0; - endfunction - - task cycle_start; - #TT; - endtask - - task cycle_end; - @(posedge ace.clk_i); - endtask - - /// Issue a beat on the AW channel. - task send_aw ( - input ax_beat_t beat - ); - ace.aw_id <= #TA beat.ax_id; - ace.aw_addr <= #TA beat.ax_addr; - ace.aw_len <= #TA beat.ax_len; - ace.aw_size <= #TA beat.ax_size; - ace.aw_burst <= #TA beat.ax_burst; - ace.aw_lock <= #TA beat.ax_lock; - ace.aw_cache <= #TA beat.ax_cache; - ace.aw_prot <= #TA beat.ax_prot; - ace.aw_qos <= #TA beat.ax_qos; - ace.aw_region <= #TA beat.ax_region; - ace.aw_atop <= #TA beat.ax_atop; - ace.aw_user <= #TA beat.ax_user; - ace.aw_valid <= #TA 1; - ace.aw_snoop <= #TA beat.ax_snoop; - ace.aw_bar <= #TA beat.ax_bar; - ace.aw_domain <= #TA beat.ax_domain; - ace.aw_awunique <= #TA beat.ax_awunique; - cycle_start(); - while (ace.aw_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.aw_id <= #TA '0; - ace.aw_addr <= #TA '0; - ace.aw_len <= #TA '0; - ace.aw_size <= #TA '0; - ace.aw_burst <= #TA '0; - ace.aw_lock <= #TA '0; - ace.aw_cache <= #TA '0; - ace.aw_prot <= #TA '0; - ace.aw_qos <= #TA '0; - ace.aw_region <= #TA '0; - ace.aw_atop <= #TA '0; - ace.aw_user <= #TA '0; - ace.aw_valid <= #TA 0; - ace.aw_snoop <= #TA '0; - ace.aw_bar <= #TA '0; - ace.aw_domain <= #TA '0; - ace.aw_awunique <= #TA 0; - endtask - - /// Issue a beat on the W channel. - task send_w ( - input w_beat_t beat - ); - ace.w_data <= #TA beat.w_data; - ace.w_strb <= #TA beat.w_strb; - ace.w_last <= #TA beat.w_last; - ace.w_user <= #TA beat.w_user; - ace.w_valid <= #TA 1; - cycle_start(); - while (ace.w_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.w_data <= #TA '0; - ace.w_strb <= #TA '0; - ace.w_last <= #TA '0; - ace.w_user <= #TA '0; - ace.w_valid <= #TA 0; - endtask - - /// Issue a beat on the B channel. - task send_b ( - input b_beat_t beat - ); - ace.b_id <= #TA beat.b_id; - ace.b_resp <= #TA beat.b_resp; - ace.b_user <= #TA beat.b_user; - ace.b_valid <= #TA 1; - cycle_start(); - while (ace.b_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.b_id <= #TA '0; - ace.b_resp <= #TA '0; - ace.b_user <= #TA '0; - ace.b_valid <= #TA 0; - cycle_start(); - while (ace.wack != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - endtask - - /// Issue a beat on the AR channel. - task send_ar ( - input ax_beat_t beat - ); - ace.ar_id <= #TA beat.ax_id; - ace.ar_addr <= #TA beat.ax_addr; - ace.ar_len <= #TA beat.ax_len; - ace.ar_size <= #TA beat.ax_size; - ace.ar_burst <= #TA beat.ax_burst; - ace.ar_lock <= #TA beat.ax_lock; - ace.ar_cache <= #TA beat.ax_cache; - ace.ar_prot <= #TA beat.ax_prot; - ace.ar_qos <= #TA beat.ax_qos; - ace.ar_region <= #TA beat.ax_region; - ace.ar_user <= #TA beat.ax_user; - ace.ar_valid <= #TA 1; - ace.ar_snoop <= #TA beat.ax_snoop; - ace.ar_bar <= #TA beat.ax_bar; - ace.ar_domain <= #TA beat.ax_domain; - cycle_start(); - while (ace.ar_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.ar_id <= #TA '0; - ace.ar_addr <= #TA '0; - ace.ar_len <= #TA '0; - ace.ar_size <= #TA '0; - ace.ar_burst <= #TA '0; - ace.ar_lock <= #TA '0; - ace.ar_cache <= #TA '0; - ace.ar_prot <= #TA '0; - ace.ar_qos <= #TA '0; - ace.ar_region <= #TA '0; - ace.ar_user <= #TA '0; - ace.ar_valid <= #TA 0; - ace.ar_snoop <= #TA '0; - ace.ar_bar <= #TA '0; - ace.ar_domain <= #TA '0; - endtask - - /// Issue a beat on the R channel. - task send_r ( - input r_beat_t beat - ); - ace.r_id <= #TA beat.r_id; - ace.r_data <= #TA beat.r_data; - ace.r_resp <= #TA beat.r_resp; - ace.r_last <= #TA beat.r_last; - ace.r_user <= #TA beat.r_user; - ace.r_valid <= #TA 1; - cycle_start(); - while (ace.r_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.r_id <= #TA '0; - ace.r_data <= #TA '0; - ace.r_resp <= #TA '0; - ace.r_last <= #TA '0; - ace.r_user <= #TA '0; - ace.r_valid <= #TA 0; - cycle_start(); - while (ace.rack != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - endtask - - /// Wait for a beat on the AW channel. - task recv_aw ( - output ax_beat_t beat - ); - ace.aw_ready <= #TA 1; - cycle_start(); - while (ace.aw_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.aw_id; - beat.ax_addr = ace.aw_addr; - beat.ax_len = ace.aw_len; - beat.ax_size = ace.aw_size; - beat.ax_burst = ace.aw_burst; - beat.ax_lock = ace.aw_lock; - beat.ax_cache = ace.aw_cache; - beat.ax_prot = ace.aw_prot; - beat.ax_qos = ace.aw_qos; - beat.ax_region = ace.aw_region; - beat.ax_atop = ace.aw_atop; - beat.ax_user = ace.aw_user; - beat.ax_snoop = ace.aw_snoop; - beat.ax_bar = ace.aw_bar; - beat.ax_domain = ace.aw_domain; - beat.ax_awunique = ace.aw_awunique; - cycle_end(); - ace.aw_ready <= #TA 0; - endtask - - /// Wait for a beat on the W channel. - task recv_w ( - output w_beat_t beat - ); - ace.w_ready <= #TA 1; - cycle_start(); - while (ace.w_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.w_data = ace.w_data; - beat.w_strb = ace.w_strb; - beat.w_last = ace.w_last; - beat.w_user = ace.w_user; - cycle_end(); - ace.w_ready <= #TA 0; - endtask - - /// Wait for a beat on the B channel. - task recv_b ( - output b_beat_t beat - ); - ace.b_ready <= #TA 1; - cycle_start(); - while (ace.b_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.b_id = ace.b_id; - beat.b_resp = ace.b_resp; - beat.b_user = ace.b_user; - cycle_end(); - ace.b_ready <= #TA 0; - ace.wack <= #TA 1; - cycle_start(); - ace.wack <= #TA 0; - endtask - - /// Wait for a beat on the AR channel. - task recv_ar ( - output ax_beat_t beat - ); - ace.ar_ready <= #TA 1; - cycle_start(); - while (ace.ar_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.ar_id; - beat.ax_addr = ace.ar_addr; - beat.ax_len = ace.ar_len; - beat.ax_size = ace.ar_size; - beat.ax_burst = ace.ar_burst; - beat.ax_lock = ace.ar_lock; - beat.ax_cache = ace.ar_cache; - beat.ax_prot = ace.ar_prot; - beat.ax_qos = ace.ar_qos; - beat.ax_region = ace.ar_region; - beat.ax_atop = 'X; // Not defined on the AR channel. - beat.ax_user = ace.ar_user; - beat.ax_snoop = ace.ar_snoop; - beat.ax_bar = ace.ar_bar; - beat.ax_domain = ace.ar_domain; - cycle_end(); - ace.ar_ready <= #TA 0; - endtask - - /// Wait for a beat on the R channel. - task recv_r ( - output r_beat_t beat - ); - ace.r_ready <= #TA 1; - cycle_start(); - while (ace.r_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.r_id = ace.r_id; - beat.r_data = ace.r_data; - beat.r_resp = ace.r_resp; - beat.r_last = ace.r_last; - beat.r_user = ace.r_user; - cycle_end(); - ace.r_ready <= #TA 0; - ace.rack <= #TA ace.r_last; - cycle_start(); - ace.rack <= #TA 0; - endtask - - /// Issue a beat on the AC channel. - task send_ac ( - input ac_beat_t beat - ); - snoop.ac_valid <= #TA 1; - snoop.ac_addr <= #TA beat.ac_addr; - snoop.ac_snoop <= #TA beat.ac_snoop; - snoop.ac_prot <= #TA beat.ac_prot; - cycle_start(); - while (snoop.ac_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - snoop.ac_valid <= #TA '0; - snoop.ac_addr <= #TA '0; - snoop.ac_snoop <= #TA '0; - snoop.ac_prot <= #TA '0; - endtask - - /// Issue a beat on the CR channel. - task send_cr ( - input cr_beat_t beat - ); - snoop.cr_valid <= #TA 1; - snoop.cr_resp <= #TA beat.cr_resp; - cycle_start(); - while (snoop.cr_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - snoop.cr_valid <= #TA '0; - snoop.cr_resp <= #TA '0; - endtask - - /// Issue a beat on the CD channel. - task send_cd ( - input cd_beat_t beat - ); - snoop.cd_valid <= #TA 1; - snoop.cd_data <= #TA beat.cd_data; - snoop.cd_last <= #TA beat.cd_last; - cycle_start(); - while (snoop.cd_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - snoop.cd_valid <= #TA '0; - snoop.cd_data <= #TA '0; - snoop.cd_last <= #TA '0; - endtask - - /// Wait for a beat on the AC channel. - task recv_ac ( - output ac_beat_t beat, - ref logic sim_done - ); - snoop.ac_ready <= #TA 1; - cycle_start(); - while ((snoop.ac_valid != 1) && !sim_done) begin - cycle_end(); cycle_start(); - end - if (!sim_done) begin - beat = new; - beat.ac_addr = snoop.ac_addr; - beat.ac_snoop = snoop.ac_snoop; - beat.ac_prot = snoop.ac_prot; - cycle_end(); - snoop.ac_ready <= #TA 0; - end - endtask - - /// Wait for a beat on the CR channel. - task recv_cr ( - output cr_beat_t beat - ); - snoop.cr_ready <= #TA 1; - cycle_start(); - while (snoop.cr_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.cr_resp = snoop.cr_resp; - cycle_end(); - snoop.cr_ready <= #TA 0; - endtask - - /// Wait for a beat on the CD channel. - task recv_cd ( - output cd_beat_t beat - ); - beat = new; - beat.cd_last = '0; - while (!beat.cd_last) begin - snoop.cd_ready <= #TA 1; - cycle_start(); - while (snoop.cd_valid != 1) begin cycle_end(); cycle_start(); end - beat.cd_data = snoop.cd_data; - beat.cd_last = snoop.cd_last; - cycle_end(); - snoop.cd_ready <= #TA 0; - end - endtask - - /// Monitor the AC channel and return the next beat. - task mon_ac ( - output ac_beat_t beat - ); - cycle_start(); - while (!(snoop.ac_valid && snoop.ac_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.ac_addr = snoop.ac_addr; - beat.ac_snoop = snoop.ac_snoop; - beat.ac_prot = snoop.ac_prot; - cycle_end(); - endtask - - /// Monitor the CR channel and return the next beat. - task mon_cr ( - output cr_beat_t beat - ); - cycle_start(); - while (!(snoop.cr_valid && snoop.cr_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.cr_resp = snoop.cr_resp; - cycle_end(); - endtask - - /// Monitor the CD channel and return the next beat. - task mon_cd ( - output cd_beat_t beat - ); - cycle_start(); - while (!(snoop.cd_valid && snoop.cd_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.cd_data = snoop.cd_data; - beat.cd_last = snoop.cd_last; - cycle_end(); - endtask - - /// Monitor the AW channel and return the next beat. - task mon_aw ( - output ax_beat_t beat - ); - cycle_start(); - while (!(ace.aw_valid && ace.aw_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.aw_id; - beat.ax_addr = ace.aw_addr; - beat.ax_len = ace.aw_len; - beat.ax_size = ace.aw_size; - beat.ax_burst = ace.aw_burst; - beat.ax_lock = ace.aw_lock; - beat.ax_cache = ace.aw_cache; - beat.ax_prot = ace.aw_prot; - beat.ax_qos = ace.aw_qos; - beat.ax_region = ace.aw_region; - beat.ax_atop = ace.aw_atop; - beat.ax_user = ace.aw_user; - beat.ax_snoop = ace.aw_snoop; - beat.ax_bar = ace.aw_bar; - beat.ax_domain = ace.aw_domain; - beat.ax_awunique = ace.aw_awunique; - cycle_end(); - endtask - - /// Monitor the W channel and return the next beat. - task mon_w ( - output w_beat_t beat - ); - cycle_start(); - while (!(ace.w_valid && ace.w_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.w_data = ace.w_data; - beat.w_strb = ace.w_strb; - beat.w_last = ace.w_last; - beat.w_user = ace.w_user; - cycle_end(); - endtask - - /// Monitor the B channel and return the next beat. - task mon_b ( - output b_beat_t beat - ); - cycle_start(); - while (!(ace.b_valid && ace.b_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.b_id = ace.b_id; - beat.b_resp = ace.b_resp; - beat.b_user = ace.b_user; - cycle_end(); - endtask - - /// Monitor the AR channel and return the next beat. - task mon_ar ( - output ax_beat_t beat - ); - cycle_start(); - while (!(ace.ar_valid && ace.ar_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.ar_id; - beat.ax_addr = ace.ar_addr; - beat.ax_len = ace.ar_len; - beat.ax_size = ace.ar_size; - beat.ax_burst = ace.ar_burst; - beat.ax_lock = ace.ar_lock; - beat.ax_cache = ace.ar_cache; - beat.ax_prot = ace.ar_prot; - beat.ax_qos = ace.ar_qos; - beat.ax_region = ace.ar_region; - beat.ax_atop = 'X; // Not defined on the AR channel. - beat.ax_user = ace.ar_user; - beat.ax_snoop = ace.ar_snoop; - beat.ax_bar = ace.ar_bar; - beat.ax_domain = ace.ar_domain; - cycle_end(); - endtask - - /// Monitor the R channel and return the next beat. - task mon_r ( - output r_beat_t beat - ); - cycle_start(); - while (!(ace.r_valid && ace.r_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.r_id = ace.r_id; - beat.r_data = ace.r_data; - beat.r_resp = ace.r_resp; - beat.r_last = ace.r_last; - beat.r_user = ace.r_user; - cycle_end(); - endtask - -endclass - -class ace_rand_master #( - // AXI interface parameters - parameter int AW = 32, - parameter int DW = 32, - parameter int IW = 8, - parameter int UW = 1, - // Snoop interface parameters - parameter int AC_AW = AW, // AC addr width - parameter int CD_DW = DW, // CD data width - // Stimuli application and test time - parameter time TA = 0ps, - parameter time TT = 0ps, - // Maximum number of read and write transactions in flight - parameter int MAX_READ_TXNS = 1, - parameter int MAX_WRITE_TXNS = 1, - // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels - parameter int AX_MIN_WAIT_CYCLES = 0, - parameter int AX_MAX_WAIT_CYCLES = 100, - parameter int W_MIN_WAIT_CYCLES = 0, - parameter int W_MAX_WAIT_CYCLES = 5, - parameter int RESP_MIN_WAIT_CYCLES = 0, - parameter int RESP_MAX_WAIT_CYCLES = 20, - // AXI feature usage - parameter int AXI_MAX_BURST_LEN = 0, // maximum number of beats in burst; 0 = AXI max (256) - parameter int TRAFFIC_SHAPING = 0, - parameter bit AXI_EXCLS = 1'b0, - parameter bit AXI_ATOPS = 1'b0, - parameter bit AXI_BURST_FIXED = 1'b0, - parameter bit AXI_BURST_INCR = 1'b1, - parameter bit AXI_BURST_WRAP = 1'b1, - parameter bit UNIQUE_IDS = 1'b0, // guarantee that the ID of each transaction is - // unique among all in-flight transactions in the - // same direction - parameter int AC_MIN_WAIT_CYCLES = 0, - parameter int AC_MAX_WAIT_CYCLES = 100, - parameter int CR_MIN_WAIT_CYCLES = 0, - parameter int CR_MAX_WAIT_CYCLES = 5, - parameter int CD_MIN_WAIT_CYCLES = 0, - parameter int CD_MAX_WAIT_CYCLES = 20, - - parameter int MEM_ADDR_SPACE = 8, // Address space for internal memory - parameter int CACHELINE_WIDTH = 0, // How many bytes in a cache line - - // Dependent parameters, do not override. - parameter int CACHELINE_WORD_SIZE = DW/8, // How many bytes in one word - parameter int AXI_STRB_WIDTH = DW/8, - parameter int N_AXI_IDS = 2**IW -); - - typedef ace_driver #( - .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TA), .TT(TT) - ) ace_driver_t; - - typedef logic [AW-1:0] addr_t; - typedef logic [MEM_ADDR_SPACE-1:0] mem_addr_t; - typedef logic [DW-1:0] data_t; - typedef logic [CD_DW-1:0] cd_data_t; - typedef logic [IW-1:0] id_t; - typedef logic [7:0] byte_t; - - // Internal "cache" memory - byte_t memory_q[mem_addr_t]; - - // Bitmask to check whether cache line boundary is crossed - static addr_t CLINE_BOUNDARY_MASK = ~((1 << $clog2(CACHELINE_WIDTH * 8)) - 1); - - localparam int CLINE_WIDTH_PER_DW = CACHELINE_WIDTH / (DW / 8); - localparam int CLINE_WIDTH_PER_CD_DW = CACHELINE_WIDTH / (CD_DW / 8); - - // Driver - ace_driver_t ace_drv; - - semaphore cnt_sem; - - // List of allowed burst types - axi_pkg::burst_t allowed_bursts[$]; - - // Max value for AxSIZE - localparam unsigned max_size = $clog2(DW); - // AxLEN for full cache line transaction - localparam unsigned cline_len = CACHELINE_WIDTH / CACHELINE_WORD_SIZE; - - int unsigned r_flight_cnt[N_AXI_IDS-1:0], - w_flight_cnt[N_AXI_IDS-1:0], - tot_r_flight_cnt, - tot_w_flight_cnt; - - ace_driver_t::ax_beat_t aw_ace_queue[$], w_queue[$]; - ace_driver_t::ac_beat_t ac_cr_queue[$], ac_cd_queue[$]; - - typedef struct packed { - addr_t addr_begin; - addr_t addr_end; - axi_pkg::mem_type_t mem_type; - } mem_region_t; - - mem_region_t mem_map[$]; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH (AW), - .AXI_DATA_WIDTH (DW), - .AXI_ID_WIDTH (IW), - .AXI_USER_WIDTH (UW) - ) ace, - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH (AW), - .SNOOP_DATA_WIDTH (DW) - ) snoop - ); - this.ace_drv = new(ace, snoop); - this.cnt_sem = new(1); - this.reset(); - if (AXI_BURST_FIXED) begin - this.allowed_bursts.push_back(axi_pkg::BURST_FIXED); - end - if (AXI_BURST_INCR) begin - this.allowed_bursts.push_back(axi_pkg::BURST_INCR); - end - if (AXI_BURST_WRAP) begin - this.allowed_bursts.push_back(axi_pkg::BURST_WRAP); - end - assert(allowed_bursts.size()) else $fatal(1, "At least one burst type has to be specified!"); - endfunction - - function void reset(); - ace_drv.reset_master(); - r_flight_cnt = '{default: 0}; - w_flight_cnt = '{default: 0}; - tot_r_flight_cnt = 0; - tot_w_flight_cnt = 0; - endfunction - - function void init_cache_memory(); - for (int addr = 0; addr < 2**MEM_ADDR_SPACE; addr++) begin - memory_q[addr] = $urandom(); - end - endfunction - - function void add_memory_region( - input addr_t addr_begin, - input addr_t addr_end, - input axi_pkg::mem_type_t mem_type - ); - mem_map.push_back({addr_begin, addr_end, mem_type}); - endfunction - - // Generate random AxSize that - // maps between allowed values - function axi_pkg::size_t gen_rand_size(); - automatic logic rand_success; - axi_pkg::size_t size; - rand_success = std::randomize(size) with { - size >= 0; - size <= max_size; - }; assert(rand_success); - return size; - endfunction - - // Generate random AxLen that - // maps between allowed values - // AxLEN cannot be wider than cache line width - function axi_pkg::len_t gen_rand_len( - input axi_pkg::size_t size, - input logic snoop_trs, - input axi_pkg::burst_t burst - ); - automatic logic rand_success; - axi_pkg::len_t len; - if (snoop_trs) begin - rand_success = std::randomize(len) with { - len inside {1, 2, 4, 8, 16}; - len <= cline_len; - }; assert(rand_success); - if ((burst == axi_pkg::BURST_WRAP) && (len == 1)) begin - // AxLEN 1 not allowed for wrap bursts - len = 2; - end - end else begin - if (burst == axi_pkg::BURST_WRAP) begin - rand_success = std::randomize(len) with { - len inside {2, 4, 8, 16}; - }; assert(rand_success); - end else begin - len = $urandom_range(1, 256); - end - end - return len; - endfunction - - function axi_pkg::burst_t get_rand_burst(); - automatic logic rand_success; - axi_pkg::burst_t burst; - rand_success = std::randomize(burst) with { - burst inside {this.allowed_bursts}; - }; assert(rand_success); - return burst; - endfunction - - function ace_driver_t::ax_beat_t new_rand_burst(input logic is_read); - - automatic ace_driver_t::ax_beat_t ax_ace_beat = new; - automatic axi_pkg::cache_t cache; - automatic axi_pkg::burst_t burst; - automatic id_t id; - automatic axi_pkg::qos_t qos; - automatic addr_t addr; - automatic axi_pkg::len_t len; - automatic axi_pkg::size_t size; - automatic ace_pkg::axbar_t bar; - automatic ace_pkg::axdomain_t domain; - automatic ace_pkg::arsnoop_t snoop; - automatic ace_pkg::awunique_t awunique; - automatic mem_region_t mem_region; - automatic ar_snoop_e ar_trs; - automatic aw_snoop_e aw_trs; - - logic snoop_trs, accepts_dirty, accepts_shared, accepts_dirty_shared; - - cache = axi_pkg::get_arcache(axi_pkg::DEVICE_BUFFERABLE); - burst = get_rand_burst(); - id = $urandom(); - qos = $urandom(); - - - // Most of ACE transactions are restricted to have - // a size of the data bus width - size = max_size; - - awunique = 1'b0; - snoop_trs = 1'b1; - - // Accepted RRESP responses - accepts_dirty = 1'b0; - accepts_shared = 1'b0; - accepts_dirty_shared = 1'b0; - - if (is_read) begin - // Read operation - std::randomize(ar_trs) with - { !(ar_trs inside {ar_unsupported_ops}); }; - case( ar_trs ) - AR_READ_NO_SNOOP: begin - snoop = ace_pkg::ReadNoSnoop; - domain = 'b00; - bar = 'b00; - snoop_trs = 1'b0; - size = gen_rand_size(); - len = gen_rand_len(size, snoop_trs, burst); - end - AR_READ_ONCE: begin - snoop = ace_pkg::ReadOnce; - domain = 'b01; - bar = 'b00; - size = gen_rand_size(); - len = gen_rand_len(size, snoop_trs, burst); - accepts_shared = 1'b1; - end - AR_READ_SHARED: begin - snoop = ace_pkg::ReadShared; - domain = 'b01; - bar = 'b00; - len = cline_len; - accepts_dirty = 1'b1; - accepts_dirty_shared = 1'b1; - accepts_shared = 1'b1; - end - AR_READ_CLEAN: begin - snoop = ace_pkg::ReadClean; - domain = 'b01; - bar = 'b00; - len = cline_len; - accepts_shared = 1'b1; - end - AR_READ_NOT_SHARED_DIRTY: begin - snoop = ace_pkg::ReadNotSharedDirty; - domain = 'b01; - bar = 'b00; - len = cline_len; - accepts_dirty = 1'b1; - accepts_shared = 1'b1; - end - AR_READ_UNIQUE: begin - snoop = ace_pkg::ReadUnique; - domain = 'b01; - bar = 'b00; - len = cline_len; - accepts_dirty = 1'b1; - end - AR_CLEAN_UNIQUE: begin - snoop = ace_pkg::CleanUnique; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AR_MAKE_UNIQUE: begin - snoop = ace_pkg::CleanUnique; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AR_CLEAN_SHARED: begin - snoop = ace_pkg::CleanShared; - domain = 'b01; - bar = 'b00; - len = cline_len; - accepts_shared = 1'b1; - end - AR_CLEAN_INVALID: begin - snoop = ace_pkg::CleanInvalid; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AR_MAKE_INVALID: begin - snoop = ace_pkg::MakeInvalid; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AR_BARRIER: begin - snoop = ace_pkg::Barrier; - domain = 'b01; - bar = 'b01; - len = cline_len; - end - AR_DVM_COMPLETE: begin - snoop = ace_pkg::DVMComplete; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AR_DVM_MESSAGE: begin - snoop = ace_pkg::DVMMessage; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - default: begin - $error("Invalid snoop op enumeration."); - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - endcase - end else begin - // Write operation - std::randomize(aw_trs) with - { !(aw_trs inside {aw_unsupported_ops}); }; - case( aw_trs ) - AW_WRITE_NO_SNOOP: begin - snoop = ace_pkg::WriteNoSnoop; - domain = 'b00; - bar = 'b00; - snoop_trs = 1'b0; - size = gen_rand_size(); - len = $urandom(); - end - AW_WRITE_UNIQUE: begin - snoop = ace_pkg::WriteUnique; - domain = 'b01; - bar = 'b00; - size = gen_rand_size(); - len = gen_rand_len(size, snoop_trs, burst); - end - AW_WRITE_LINE_UNIQUE: begin - snoop = ace_pkg::WriteLineUnique; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AW_WRITE_CLEAN: begin - snoop = ace_pkg::WriteClean; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AW_WRITE_BACK: begin - snoop = ace_pkg::WriteBack; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AW_EVICT: begin - snoop = ace_pkg::Evict; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AW_WRITE_EVICT: begin - snoop = ace_pkg::WriteEvict; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AR_MAKE_UNIQUE: begin - snoop = ace_pkg::CleanUnique; - domain = 'b01; - bar = 'b00; - len = cline_len; - end - AW_BARRIER: begin - snoop = ace_pkg::Barrier; - domain = 'b01; - bar = 'b01; - len = cline_len; - end - default: begin - $error("Invalid snoop op enumeration."); - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - endcase - end - - mem_region = '{ - addr_begin: '0, - addr_end: '1, - mem_type: axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE - }; - - forever begin - // Randomize address - addr = $urandom_range(mem_region.addr_begin, mem_region.addr_end); - addr[AXI_STRB_WIDTH:0] = '0; // align address to word boundary - if (snoop_trs) begin - if (burst == axi_pkg::BURST_FIXED) begin - $error("FIXED type burst not allowed!"); - end else if (burst == axi_pkg::BURST_INCR) begin - // Assert that transaction does not cross cache line boundary - if (((addr + ((2**size * len)-1)) & CLINE_BOUNDARY_MASK) == (addr & CLINE_BOUNDARY_MASK)) begin - break; - end - end else begin - // WRAP bursts should be fine in all situations - break; - end - end else begin - break; - end - end - - ax_ace_beat.ax_addr = addr; - ax_ace_beat.ax_burst = burst; - ax_ace_beat.ax_size = size; - ax_ace_beat.ax_len = len - 1; - ax_ace_beat.ax_id = id; - ax_ace_beat.ax_qos = qos; - ax_ace_beat.ax_snoop = snoop; - ax_ace_beat.ax_bar = bar; - ax_ace_beat.ax_domain = domain; - ax_ace_beat.ax_awunique = awunique; - - return ax_ace_beat; - - endfunction - - // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with - // `this.drv.ace.clk_i` as `clk` argument. What is the syntax getting an assignable reference? - task automatic rand_wait(input int unsigned min, max); - int unsigned rand_success, cycles; - cycles = $urandom_range(min,max); - // rand_success = std::randomize(cycles) with { - // cycles >= min; - // cycles <= max; - // }; - // assert (rand_success) else $error("Failed to randomize wait cycles!"); - repeat (cycles) @(posedge this.ace_drv.ace.clk_i); - endtask - - task send_ars(input int n_reads); - automatic logic rand_success; - repeat (n_reads) begin - automatic id_t id; - automatic ace_driver_t::ax_beat_t ar_ace_beat = new_rand_burst(1'b1); - while (tot_r_flight_cnt >= MAX_READ_TXNS) begin - rand_wait(1, 1); - end - tot_r_flight_cnt++; - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - ace_drv.send_ar(ar_ace_beat); - end - $info("Finish ARs"); - endtask - - task recv_rs(ref logic ar_done); - while (!(ar_done && tot_r_flight_cnt == 0)) begin - automatic ace_driver_t::r_beat_t r_ace_beat; - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - if (tot_r_flight_cnt > 0) begin - ace_drv.recv_r(r_ace_beat); - if (r_ace_beat.r_last) begin - cnt_sem.get(); - r_flight_cnt[r_ace_beat.r_id]--; - tot_r_flight_cnt--; - cnt_sem.put(); - end - end - end - $info("Finish Rs"); - endtask - - task create_aws(input int n_writes); - automatic logic rand_success; - repeat (n_writes) begin - automatic bit excl = 1'b0; - automatic ace_driver_t::ax_beat_t aw_ace_beat; - aw_ace_beat = new_rand_burst(1'b0); - while (tot_w_flight_cnt >= MAX_WRITE_TXNS) begin - rand_wait(1, 1); - end - tot_w_flight_cnt++; - aw_ace_queue.push_back(aw_ace_beat); - w_queue.push_back(aw_ace_beat); - end - $info("Finish AW creates"); - endtask - - task send_aws(ref logic aw_done); - while (!(aw_done && aw_ace_queue.size() == 0)) begin - automatic ace_driver_t::ax_beat_t aw_ace_beat; - wait (aw_ace_queue.size() > 0 || (aw_done && aw_ace_queue.size() == 0)); - aw_ace_beat = aw_ace_queue.pop_front(); - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - ace_drv.send_aw(aw_ace_beat); - end - $info("Finish AW sends"); - endtask - - task send_ws(ref logic aw_done); - while (!(aw_done && w_queue.size() == 0)) begin - automatic ace_driver_t::ax_beat_t aw_ace_beat; - automatic addr_t addr; - static logic rand_success; - wait (w_queue.size() > 0 || (aw_done && w_queue.size() == 0)); - aw_ace_beat = w_queue.pop_front(); - for (int unsigned i = 0; i < aw_ace_beat.ax_len + 1; i++) begin - automatic ace_driver_t::w_beat_t w_beat = new; - automatic int unsigned begin_byte, end_byte, n_bytes; - automatic logic [AXI_STRB_WIDTH-1:0] rand_strb, strb_mask; - addr = axi_pkg::beat_addr(aw_ace_beat.ax_addr, aw_ace_beat.ax_size, aw_ace_beat.ax_len, - aw_ace_beat.ax_burst, i); - //rand_success = w_beat.randomize(); assert (rand_success); - // Determine strobe. - w_beat.w_strb = '0; - n_bytes = 2**aw_ace_beat.ax_size; - begin_byte = addr % AXI_STRB_WIDTH; - end_byte = ((begin_byte + n_bytes) >> aw_ace_beat.ax_size) << aw_ace_beat.ax_size; - strb_mask = '0; - for (int unsigned b = begin_byte; b < end_byte; b++) - strb_mask[b] = 1'b1; - rand_strb = $urandom(); - //rand_success = std::randomize(rand_strb); assert (rand_success); - w_beat.w_strb |= (rand_strb & strb_mask); - // Determine last. - w_beat.w_last = (i == aw_ace_beat.ax_len); - rand_wait(W_MIN_WAIT_CYCLES, W_MAX_WAIT_CYCLES); - ace_drv.send_w(w_beat); - end - end - $info("Finish Ws"); - endtask - - task recv_bs(ref logic aw_done); - while (!(aw_done && tot_w_flight_cnt == 0)) begin - automatic ace_driver_t::b_beat_t b_beat; - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - ace_drv.recv_b(b_beat); - cnt_sem.get(); - w_flight_cnt[b_beat.b_id]--; - tot_w_flight_cnt--; - cnt_sem.put(); - end - $info("Finish Bs"); - endtask - - task recv_acs(ref logic sim_done); - while (!sim_done) begin - automatic ace_driver_t::ac_beat_t ace_ac_beat; - rand_wait(AC_MIN_WAIT_CYCLES, AC_MAX_WAIT_CYCLES); - ace_drv.recv_ac(ace_ac_beat, sim_done); - if (!sim_done) begin - // Determine randomly already here whether this AC causes datatransfer - // Ideally, this would be replaced by looking up the internal cache memory - ace_ac_beat.data_transfer = $urandom_range(0,1); - ac_cr_queue.push_back(ace_ac_beat); - ac_cd_queue.push_back(ace_ac_beat); - end - end - $info("Finish ACs"); - endtask - - task send_crs(ref logic sim_done); - while (!sim_done) begin - automatic logic rand_success; - automatic ace_driver_t::ac_beat_t ace_ac_beat; - automatic ace_driver_t::cr_beat_t ace_cr_beat = new; - wait ((ac_cr_queue.size() > 0) || sim_done); - if (ac_cr_queue.size() > 0) begin - ace_ac_beat = ac_cr_queue.pop_front(); - ace_cr_beat.cr_resp[4:2] = $urandom_range(0,3'b111);//$urandom_range(0,5'b11111); - ace_cr_beat.cr_resp[1] = 1'b0; - ace_cr_beat.cr_resp[0] = ace_ac_beat.data_transfer; - rand_wait(CR_MIN_WAIT_CYCLES, CR_MAX_WAIT_CYCLES); - ace_drv.send_cr(ace_cr_beat); - end - end - $info("CR done"); - endtask - - task send_cds(ref logic sim_done); - while (!sim_done) begin - automatic logic rand_success; - automatic ace_driver_t::ac_beat_t ace_ac_beat; - automatic ace_driver_t::cd_beat_t ace_cd_beat = new; - automatic addr_t byte_addr; - automatic mem_addr_t mem_addr; - automatic cd_data_t cd_word; - wait ((ac_cd_queue.size() > 0) || sim_done); - if (ac_cd_queue.size() > 0) begin - ace_ac_beat = ac_cd_queue.pop_front(); - // If data transfer, send CD data. Otherwise, ignore. - if (ace_ac_beat.data_transfer) begin - mem_addr = ace_ac_beat.ac_addr[MEM_ADDR_SPACE-1:0]; - for (int i = 0; i < CLINE_WIDTH_PER_CD_DW; i++) begin - for (int j = 0; j < (CD_DW / 8); j++) begin - // Compose CD word that is CD_DW bits wide - cd_word[j*(CD_DW/8) +: 8] = memory_q[mem_addr+(i*(CD_DW/8)+j)]; - end - // random response - ace_cd_beat.cd_data = cd_word; - if (i == (CLINE_WIDTH_PER_CD_DW - 1)) begin - ace_cd_beat.cd_last = 1'b1; - end else begin - ace_cd_beat.cd_last = 1'b0; - end - rand_wait(CD_MIN_WAIT_CYCLES, CD_MAX_WAIT_CYCLES); - ace_drv.send_cd(ace_cd_beat); - end - end - end - end - $info("CD done"); - endtask - - task sim_done_task(ref logic first, ref logic second); - forever begin - if (first && second) begin - break; - end - #TT; - end - endtask - - // Issue n_reads random read and n_writes random - // write transactions to an address range. - task run(input int n_reads, input int n_writes); - automatic logic ar_done = 1'b0, - aw_done = 1'b0, - b_done = 1'b0, - r_done = 1'b0, - sim_done = 1'b0; - fork - begin - send_ars(n_reads); - ar_done = 1'b1; - end - begin - recv_rs(ar_done); - r_done = 1'b1; - end - begin - create_aws(n_writes); - aw_done = 1'b1; - end - send_aws(aw_done); - send_ws(aw_done); - begin - recv_bs(aw_done); - b_done = 1'b1; - end - begin - sim_done_task(r_done, b_done); - sim_done = 1'b1; - end - recv_acs(sim_done); - send_crs(sim_done); - send_cds(sim_done); - join - endtask - -endclass - -// Datatype for storing the data that was transferred in -// an AXI transaction -class axi_transaction #( - /// AXI4+ATOP address width - parameter int unsigned AW = 0, - /// AXI4+ATOP data width - parameter int unsigned DW = 0, -); - rand bit [AW-1:0] address; - rand bit [DW-1:0] data; - rand bit write_en; - - function new(); - address = 0; - data = 0; - write_en = 0; - endfunction - -endclass - - -class ace_monitor #( - /// AXI4+ATOP ID width - parameter int unsigned IW = 0, - /// AXI4+ATOP address width - parameter int unsigned AW = 0, - /// AXI4+ATOP data width - parameter int unsigned DW = 0, - /// AXI4+ATOP user width - parameter int unsigned UW = 0, - /// Stimuli test time - parameter time TT = 0ns -); - - typedef axi_transaction #( - .AW(AW), .DW(DW) - ) axi_txn_t; - - typedef ace_driver #( - .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TT), .TT(TT) - ) ace_driver_t; - - ace_driver_t ace_drv; - axi_txn_t axi_txn; - event new_axi_txn_event; - - ace_driver_t::ax_beat_t new_ax_transaction; - - mailbox aw_mbx = new, w_mbx = new, b_mbx = new, - ar_mbx = new, r_mbx = new; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) ace, - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH (AW), - .SNOOP_DATA_WIDTH (DW) - ) snoop - ); - this.ace_drv = new(ace, snoop); - this.new_axi_txn_event = new(); - endfunction - - task monitor; - fork - // AW - forever begin - automatic ace_driver_t::ax_beat_t ax; - this.ace_drv.mon_aw(ax); - aw_mbx.put(ax); - end - // W - forever begin - automatic w_beat_t w; - this.drv.mon_w(w); - w_mbx.put(w); - end - // B - forever begin - automatic b_beat_t b; - this.drv.mon_b(b); - b_mbx.put(b); - end - // AR - forever begin - automatic ax_beat_t ax; - this.drv.mon_ar(ax); - ar_mbx.put(ax); - end - // R - forever begin - automatic r_beat_t r; - this.drv.mon_r(r); - r_mbx.put(r); - -> txn_event; - end - join - endtask - -endclass - - -class ace_scoreboard #( - /// AXI4+ATOP ID width - parameter int unsigned IW = 0, - /// AXI4+ATOP address width - parameter int unsigned AW = 0, - /// AXI4+ATOP data width - parameter int unsigned DW = 0, - /// AXI4+ATOP user width - parameter int unsigned UW = 0, - /// Stimuli test time - parameter time TT = 0ns -); - - typedef axi_transaction #( - .AW(AW), .DW(DW) - ) axi_txn_t; - - ref event new_axi_txn_event; - ref axi_txn_t axi_txn; - - // Monitor interface - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH ( AW ), - .AXI_DATA_WIDTH ( DW ), - .AXI_ID_WIDTH ( IW ), - .AXI_USER_WIDTH ( UW ) - ) ace; - - /// New constructor - function new( - ref event e, - ref axi_txn_t t - ); - this.new_axi_txn_event = e; - this.axi_txn = t; - endfunction - -endpackage \ No newline at end of file diff --git a/src/deprecated/ace_test.sv b/src/deprecated/ace_test.sv deleted file mode 100644 index 125f626..0000000 --- a/src/deprecated/ace_test.sv +++ /dev/null @@ -1,1873 +0,0 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna -// Copyright (c) 2022 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// - - -/// A set of testbench utilities for ACE interfaces. -package ace_test; - - import axi_pkg::*; - import ace_pkg::*; - - typedef enum logic [3:0] { - AR_READ_NO_SNOOP, - AR_READ_ONCE, - AR_READ_SHARED, - AR_READ_CLEAN, - AR_READ_NOT_SHARED_DIRTY, - AR_READ_UNIQUE, - AR_CLEAN_UNIQUE, - AR_MAKE_UNIQUE, - AR_CLEAN_SHARED, - AR_CLEAN_INVALID, - AR_MAKE_INVALID, - AR_BARRIER, - AR_DVM_COMPLETE, - AR_DVM_MESSAGE - } ar_snoop_e; - - ar_snoop_e ar_unsupported_ops[] = '{AR_READ_NO_SNOOP, AR_BARRIER, AR_DVM_COMPLETE, AR_DVM_MESSAGE}; - - typedef enum logic [2:0] { - AW_WRITE_NO_SNOOP, - AW_WRITE_UNIQUE, - AW_WRITE_LINE_UNIQUE, - AW_WRITE_CLEAN, - AW_WRITE_BACK, - AW_EVICT, - AW_WRITE_EVICT, - AW_BARRIER - } aw_snoop_e; - - aw_snoop_e aw_unsupported_ops[] = '{AW_BARRIER}; - - /// The data transferred on a beat on the AW/AR channels. - class ace_ax_beat #( - parameter AW = 32, - parameter IW = 8 , - parameter UW = 1 - ); - rand logic [IW-1:0] ax_id = '0; - rand logic [AW-1:0] ax_addr = '0; - logic [7:0] ax_len = '0; - logic [2:0] ax_size = '0; - logic [1:0] ax_burst = '0; - logic ax_lock = '0; - logic [3:0] ax_cache = '0; - logic [2:0] ax_prot = '0; - rand logic [3:0] ax_qos = '0; - logic [3:0] ax_region = '0; - logic [5:0] ax_atop = '0; // Only defined on the AW channel. - rand logic [UW-1:0] ax_user = '0; - rand logic [3:0] ax_snoop = '0; // AW channel requires 3 bits, AR channel requires 4 bits - rand logic [1:0] ax_bar = '0; - rand logic [1:0] ax_domain = '0; - rand logic ax_awunique = '0; // Only for AW - endclass - - /// The data transferred on a beat on the R channel. - class ace_r_beat #( - parameter DW = 32, - parameter IW = 8 , - parameter UW = 1 - ); - rand logic [IW-1:0] r_id = '0; - rand logic [DW-1:0] r_data = '0; - ace_pkg::rresp_t r_resp = '0; - logic r_last = '0; - rand logic [UW-1:0] r_user = '0; - endclass - - /// The data transferred on a beat on the W channel. - class axi_w_beat #( - parameter DW = 32, - parameter UW = 1 - ); - rand logic [DW-1:0] w_data = '0; - rand logic [DW/8-1:0] w_strb = '0; - logic w_last = '0; - rand logic [UW-1:0] w_user = '0; -endclass - - /// The data transferred on a beat on the B channel. -class axi_b_beat #( - parameter IW = 8, - parameter UW = 1 - ); - rand logic [IW-1:0] b_id = '0; - axi_pkg::resp_t b_resp = '0; - rand logic [UW-1:0] b_user = '0; -endclass - - - /// A driver for AXI4 interface. - class ace_driver #( - parameter int AW = 32 , - parameter int DW = 32 , - parameter int IW = 8 , - parameter int UW = 1 , - parameter time TA = 0ns , // stimuli application time - parameter time TT = 0ns // stimuli test time - ); - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) ace; - -// typedef axi_test::axi_driver #( -// .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TA), .TT(TT) -// ) axi_driver_t; - typedef ace_ax_beat #(.AW(AW), .IW(IW), .UW(UW)) ax_ace_beat_t; - typedef axi_w_beat #(.DW(DW), .UW(UW)) w_beat_t; - typedef axi_b_beat #(.IW(IW), .UW(UW)) b_beat_t; -// typedef axi_driver_t::w_beat_t w_beat_t; -// typedef axi_driver_t::b_beat_t b_beat_t; - typedef ace_r_beat #(.DW(DW), .IW(IW), .UW(UW)) r_ace_beat_t; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) ace - ); - this.ace = ace; - endfunction - - function void reset_master(); - ace.aw_id <= '0; - ace.aw_addr <= '0; - ace.aw_len <= '0; - ace.aw_size <= '0; - ace.aw_burst <= '0; - ace.aw_lock <= '0; - ace.aw_cache <= '0; - ace.aw_prot <= '0; - ace.aw_qos <= '0; - ace.aw_region <= '0; - ace.aw_atop <= '0; - ace.aw_user <= '0; - ace.aw_valid <= '0; - ace.aw_snoop <= '0; - ace.aw_bar <= '0; - ace.aw_domain <= '0; - ace.aw_awunique <= '0; - ace.w_data <= '0; - ace.w_strb <= '0; - ace.w_last <= '0; - ace.w_user <= '0; - ace.w_valid <= '0; - ace.b_ready <= '0; - ace.ar_id <= '0; - ace.ar_addr <= '0; - ace.ar_len <= '0; - ace.ar_size <= '0; - ace.ar_burst <= '0; - ace.ar_lock <= '0; - ace.ar_cache <= '0; - ace.ar_prot <= '0; - ace.ar_qos <= '0; - ace.ar_region <= '0; - ace.ar_user <= '0; - ace.ar_snoop <= '0; - ace.ar_bar <= '0; - ace.ar_domain <= '0; - ace.ar_valid <= '0; - ace.r_ready <= '0; - ace.wack <= '0; - ace.rack <= '0; - endfunction - - function void reset_slave(); - ace.aw_ready <= '0; - ace.w_ready <= '0; - ace.b_id <= '0; - ace.b_resp <= '0; - ace.b_user <= '0; - ace.b_valid <= '0; - ace.ar_ready <= '0; - ace.r_id <= '0; - ace.r_data <= '0; - ace.r_resp <= '0; - ace.r_last <= '0; - ace.r_user <= '0; - ace.r_valid <= '0; - endfunction - - task cycle_start; - #TT; - endtask - - task cycle_end; - @(posedge ace.clk_i); - endtask - - /// Issue a beat on the AW channel. - task send_aw ( - input ax_ace_beat_t beat - ); - ace.aw_id <= #TA beat.ax_id; - ace.aw_addr <= #TA beat.ax_addr; - ace.aw_len <= #TA beat.ax_len; - ace.aw_size <= #TA beat.ax_size; - ace.aw_burst <= #TA beat.ax_burst; - ace.aw_lock <= #TA beat.ax_lock; - ace.aw_cache <= #TA beat.ax_cache; - ace.aw_prot <= #TA beat.ax_prot; - ace.aw_qos <= #TA beat.ax_qos; - ace.aw_region <= #TA beat.ax_region; - ace.aw_atop <= #TA beat.ax_atop; - ace.aw_user <= #TA beat.ax_user; - ace.aw_valid <= #TA 1; - ace.aw_snoop <= #TA beat.ax_snoop; - ace.aw_bar <= #TA beat.ax_bar; - ace.aw_domain <= #TA beat.ax_domain; - ace.aw_awunique <= #TA beat.ax_awunique; - cycle_start(); - while (ace.aw_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.aw_id <= #TA '0; - ace.aw_addr <= #TA '0; - ace.aw_len <= #TA '0; - ace.aw_size <= #TA '0; - ace.aw_burst <= #TA '0; - ace.aw_lock <= #TA '0; - ace.aw_cache <= #TA '0; - ace.aw_prot <= #TA '0; - ace.aw_qos <= #TA '0; - ace.aw_region <= #TA '0; - ace.aw_atop <= #TA '0; - ace.aw_user <= #TA '0; - ace.aw_valid <= #TA 0; - ace.aw_snoop <= #TA '0; - ace.aw_bar <= #TA '0; - ace.aw_domain <= #TA '0; - ace.aw_awunique <= #TA 0; - endtask - - /// Issue a beat on the W channel. - task send_w ( - input w_beat_t beat - ); - ace.w_data <= #TA beat.w_data; - ace.w_strb <= #TA beat.w_strb; - ace.w_last <= #TA beat.w_last; - ace.w_user <= #TA beat.w_user; - ace.w_valid <= #TA 1; - cycle_start(); - while (ace.w_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.w_data <= #TA '0; - ace.w_strb <= #TA '0; - ace.w_last <= #TA '0; - ace.w_user <= #TA '0; - ace.w_valid <= #TA 0; - endtask - - /// Issue a beat on the B channel. - task send_b ( - input b_beat_t beat - ); - ace.b_id <= #TA beat.b_id; - ace.b_resp <= #TA beat.b_resp; - ace.b_user <= #TA beat.b_user; - ace.b_valid <= #TA 1; - cycle_start(); - while (ace.b_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.b_id <= #TA '0; - ace.b_resp <= #TA '0; - ace.b_user <= #TA '0; - ace.b_valid <= #TA 0; - cycle_start(); - while (ace.wack != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - endtask - - /// Issue a beat on the AR channel. - task send_ar ( - input ax_ace_beat_t beat - ); - ace.ar_id <= #TA beat.ax_id; - ace.ar_addr <= #TA beat.ax_addr; - ace.ar_len <= #TA beat.ax_len; - ace.ar_size <= #TA beat.ax_size; - ace.ar_burst <= #TA beat.ax_burst; - ace.ar_lock <= #TA beat.ax_lock; - ace.ar_cache <= #TA beat.ax_cache; - ace.ar_prot <= #TA beat.ax_prot; - ace.ar_qos <= #TA beat.ax_qos; - ace.ar_region <= #TA beat.ax_region; - ace.ar_user <= #TA beat.ax_user; - ace.ar_valid <= #TA 1; - ace.ar_snoop <= #TA beat.ax_snoop; - ace.ar_bar <= #TA beat.ax_bar; - ace.ar_domain <= #TA beat.ax_domain; - cycle_start(); - while (ace.ar_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.ar_id <= #TA '0; - ace.ar_addr <= #TA '0; - ace.ar_len <= #TA '0; - ace.ar_size <= #TA '0; - ace.ar_burst <= #TA '0; - ace.ar_lock <= #TA '0; - ace.ar_cache <= #TA '0; - ace.ar_prot <= #TA '0; - ace.ar_qos <= #TA '0; - ace.ar_region <= #TA '0; - ace.ar_user <= #TA '0; - ace.ar_valid <= #TA 0; - ace.ar_snoop <= #TA '0; - ace.ar_bar <= #TA '0; - ace.ar_domain <= #TA '0; - endtask - - /// Issue a beat on the R channel. - task send_r ( - input r_ace_beat_t beat - ); - ace.r_id <= #TA beat.r_id; - ace.r_data <= #TA beat.r_data; - ace.r_resp <= #TA beat.r_resp; - ace.r_last <= #TA beat.r_last; - ace.r_user <= #TA beat.r_user; - ace.r_valid <= #TA 1; - cycle_start(); - while (ace.r_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.r_id <= #TA '0; - ace.r_data <= #TA '0; - ace.r_resp <= #TA '0; - ace.r_last <= #TA '0; - ace.r_user <= #TA '0; - ace.r_valid <= #TA 0; - cycle_start(); - while (ace.rack != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - endtask - - /// Wait for a beat on the AW channel. - task recv_aw ( - output ax_ace_beat_t beat - ); - ace.aw_ready <= #TA 1; - cycle_start(); - while (ace.aw_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.aw_id; - beat.ax_addr = ace.aw_addr; - beat.ax_len = ace.aw_len; - beat.ax_size = ace.aw_size; - beat.ax_burst = ace.aw_burst; - beat.ax_lock = ace.aw_lock; - beat.ax_cache = ace.aw_cache; - beat.ax_prot = ace.aw_prot; - beat.ax_qos = ace.aw_qos; - beat.ax_region = ace.aw_region; - beat.ax_atop = ace.aw_atop; - beat.ax_user = ace.aw_user; - beat.ax_snoop = ace.aw_snoop; - beat.ax_bar = ace.aw_bar; - beat.ax_domain = ace.aw_domain; - beat.ax_awunique = ace.aw_awunique; - cycle_end(); - ace.aw_ready <= #TA 0; - endtask - - /// Wait for a beat on the W channel. - task recv_w ( - output w_beat_t beat - ); - ace.w_ready <= #TA 1; - cycle_start(); - while (ace.w_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.w_data = ace.w_data; - beat.w_strb = ace.w_strb; - beat.w_last = ace.w_last; - beat.w_user = ace.w_user; - cycle_end(); - ace.w_ready <= #TA 0; - endtask - - /// Wait for a beat on the B channel. - task recv_b ( - output b_beat_t beat - ); - ace.b_ready <= #TA 1; - cycle_start(); - while (ace.b_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.b_id = ace.b_id; - beat.b_resp = ace.b_resp; - beat.b_user = ace.b_user; - cycle_end(); - ace.b_ready <= #TA 0; - ace.wack <= #TA 1; - cycle_start(); - ace.wack <= #TA 0; - endtask - - /// Wait for a beat on the AR channel. - task recv_ar ( - output ax_ace_beat_t beat - ); - ace.ar_ready <= #TA 1; - cycle_start(); - while (ace.ar_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.ar_id; - beat.ax_addr = ace.ar_addr; - beat.ax_len = ace.ar_len; - beat.ax_size = ace.ar_size; - beat.ax_burst = ace.ar_burst; - beat.ax_lock = ace.ar_lock; - beat.ax_cache = ace.ar_cache; - beat.ax_prot = ace.ar_prot; - beat.ax_qos = ace.ar_qos; - beat.ax_region = ace.ar_region; - beat.ax_atop = 'X; // Not defined on the AR channel. - beat.ax_user = ace.ar_user; - beat.ax_snoop = ace.ar_snoop; - beat.ax_bar = ace.ar_bar; - beat.ax_domain = ace.ar_domain; - cycle_end(); - ace.ar_ready <= #TA 0; - endtask - - /// Wait for a beat on the R channel. - task recv_r ( - output r_ace_beat_t beat - ); - ace.r_ready <= #TA 1; - cycle_start(); - while (ace.r_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.r_id = ace.r_id; - beat.r_data = ace.r_data; - beat.r_resp = ace.r_resp; - beat.r_last = ace.r_last; - beat.r_user = ace.r_user; - cycle_end(); - ace.r_ready <= #TA 0; - ace.rack <= #TA ace.r_last; - cycle_start(); - ace.rack <= #TA 0; - endtask - - /// Monitor the AW channel and return the next beat. - task mon_aw ( - output ax_ace_beat_t beat - ); - cycle_start(); - while (!(ace.aw_valid && ace.aw_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.aw_id; - beat.ax_addr = ace.aw_addr; - beat.ax_len = ace.aw_len; - beat.ax_size = ace.aw_size; - beat.ax_burst = ace.aw_burst; - beat.ax_lock = ace.aw_lock; - beat.ax_cache = ace.aw_cache; - beat.ax_prot = ace.aw_prot; - beat.ax_qos = ace.aw_qos; - beat.ax_region = ace.aw_region; - beat.ax_atop = ace.aw_atop; - beat.ax_user = ace.aw_user; - beat.ax_snoop = ace.aw_snoop; - beat.ax_bar = ace.aw_bar; - beat.ax_domain = ace.aw_domain; - beat.ax_awunique = ace.aw_awunique; - cycle_end(); - endtask - - /// Monitor the W channel and return the next beat. - task mon_w ( - output w_beat_t beat - ); - cycle_start(); - while (!(ace.w_valid && ace.w_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.w_data = ace.w_data; - beat.w_strb = ace.w_strb; - beat.w_last = ace.w_last; - beat.w_user = ace.w_user; - cycle_end(); - endtask - - /// Monitor the B channel and return the next beat. - task mon_b ( - output b_beat_t beat - ); - cycle_start(); - while (!(ace.b_valid && ace.b_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.b_id = ace.b_id; - beat.b_resp = ace.b_resp; - beat.b_user = ace.b_user; - cycle_end(); - endtask - - /// Monitor the AR channel and return the next beat. - task mon_ar ( - output ax_ace_beat_t beat - ); - cycle_start(); - while (!(ace.ar_valid && ace.ar_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.ar_id; - beat.ax_addr = ace.ar_addr; - beat.ax_len = ace.ar_len; - beat.ax_size = ace.ar_size; - beat.ax_burst = ace.ar_burst; - beat.ax_lock = ace.ar_lock; - beat.ax_cache = ace.ar_cache; - beat.ax_prot = ace.ar_prot; - beat.ax_qos = ace.ar_qos; - beat.ax_region = ace.ar_region; - beat.ax_atop = 'X; // Not defined on the AR channel. - beat.ax_user = ace.ar_user; - beat.ax_snoop = ace.ar_snoop; - beat.ax_bar = ace.ar_bar; - beat.ax_domain = ace.ar_domain; - cycle_end(); - endtask - - /// Monitor the R channel and return the next beat. - task mon_r ( - output r_ace_beat_t beat - ); - cycle_start(); - while (!(ace.r_valid && ace.r_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.r_id = ace.r_id; - beat.r_data = ace.r_data; - beat.r_resp = ace.r_resp; - beat.r_last = ace.r_last; - beat.r_user = ace.r_user; - cycle_end(); - endtask - - endclass - - class ace_rand_master #( - // AXI interface parameters - parameter int AW = 32, - parameter int DW = 32, - parameter int IW = 8, - parameter int UW = 1, - // Stimuli application and test time - parameter time TA = 0ps, - parameter time TT = 0ps, - // Maximum number of read and write transactions in flight - parameter int MAX_READ_TXNS = 1, - parameter int MAX_WRITE_TXNS = 1, - // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels - parameter int AX_MIN_WAIT_CYCLES = 0, - parameter int AX_MAX_WAIT_CYCLES = 100, - parameter int W_MIN_WAIT_CYCLES = 0, - parameter int W_MAX_WAIT_CYCLES = 5, - parameter int RESP_MIN_WAIT_CYCLES = 0, - parameter int RESP_MAX_WAIT_CYCLES = 20, - // AXI feature usage - parameter int AXI_MAX_BURST_LEN = 0, // maximum number of beats in burst; 0 = AXI max (256) - parameter int TRAFFIC_SHAPING = 0, - parameter bit AXI_EXCLS = 1'b0, - parameter bit AXI_ATOPS = 1'b0, - parameter bit AXI_BURST_FIXED = 1'b1, - parameter bit AXI_BURST_INCR = 1'b1, - parameter bit AXI_BURST_WRAP = 1'b0, - parameter bit UNIQUE_IDS = 1'b0, // guarantee that the ID of each transaction is - // unique among all in-flight transactions in the - // same direction - // Dependent parameters, do not override. - parameter int AXI_STRB_WIDTH = DW/8, - parameter int N_AXI_IDS = 2**IW - ); - typedef ace_test::ace_driver #( - .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TA), .TT(TT) - ) ace_driver_t; - typedef logic [AW-1:0] addr_t; - typedef axi_pkg::burst_t burst_t; - typedef axi_pkg::cache_t cache_t; - typedef logic [DW-1:0] data_t; - typedef logic [IW-1:0] id_t; - typedef axi_pkg::len_t len_t; - typedef axi_pkg::size_t size_t; - typedef ace_pkg::arsnoop_t snoop_t; // use only arsnoop_t, which is bigger than awsnoop_t - typedef ace_pkg::axbar_t bar_t; - typedef ace_pkg::axdomain_t domain_t; - typedef ace_pkg::awunique_t awunique_t; - - - typedef logic [UW-1:0] user_t; - typedef axi_pkg::mem_type_t mem_type_t; - - typedef ace_driver_t::ax_ace_beat_t ax_ace_beat_t; - typedef ace_driver_t::b_beat_t b_beat_t; - typedef ace_driver_t::r_ace_beat_t r_ace_beat_t; - typedef ace_driver_t::w_beat_t w_beat_t; - - static addr_t PFN_MASK = '{11: 1'b0, 10: 1'b0, 9: 1'b0, 8: 1'b0, 7: 1'b0, 6: 1'b0, 5: 1'b0, - 4: 1'b0, 3: 1'b0, 2: 1'b0, 1: 1'b0, 0: 1'b0, default: '1}; - - ace_driver_t drv; - - int unsigned r_flight_cnt[N_AXI_IDS-1:0], - w_flight_cnt[N_AXI_IDS-1:0], - tot_r_flight_cnt, - tot_w_flight_cnt; - logic [N_AXI_IDS-1:0] atop_resp_b, - atop_resp_r; - - len_t max_len; - burst_t allowed_bursts[$]; - - semaphore cnt_sem; - - ax_ace_beat_t aw_ace_queue[$], - w_queue[$], - excl_queue[$]; - - typedef struct packed { - addr_t addr_begin; - addr_t addr_end; - mem_type_t mem_type; - } mem_region_t; - mem_region_t mem_map[$]; - - struct packed { - int unsigned len ; - int unsigned cprob; - } traffic_shape[$]; - int unsigned max_cprob; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) ace - ); - if (AXI_MAX_BURST_LEN <= 0 || AXI_MAX_BURST_LEN > 256) begin - this.max_len = 255; - end else begin - this.max_len = AXI_MAX_BURST_LEN - 1; - end - this.drv = new(ace); - this.cnt_sem = new(1); - this.reset(); - if (AXI_BURST_FIXED) begin - this.allowed_bursts.push_back(BURST_FIXED); - end - if (AXI_BURST_INCR) begin - this.allowed_bursts.push_back(BURST_INCR); - end - if (AXI_BURST_WRAP) begin - this.allowed_bursts.push_back(BURST_WRAP); - end - assert(allowed_bursts.size()) else $fatal(1, "At least one burst type has to be specified!"); - endfunction - - function void reset(); - drv.reset_master(); - r_flight_cnt = '{default: 0}; - w_flight_cnt = '{default: 0}; - tot_r_flight_cnt = 0; - tot_w_flight_cnt = 0; - atop_resp_b = '0; - atop_resp_r = '0; - endfunction - - function void add_memory_region(input addr_t addr_begin, input addr_t addr_end, input mem_type_t mem_type); - mem_map.push_back({addr_begin, addr_end, mem_type}); - endfunction - - function void add_traffic_shaping(input int unsigned len, input int unsigned freq); - if (traffic_shape.size() == 0) - traffic_shape.push_back({len, freq}); - else - traffic_shape.push_back({len, traffic_shape[$].cprob + freq}); - - max_cprob = traffic_shape[$].cprob; - endfunction : add_traffic_shaping - - function ax_ace_beat_t new_rand_burst(input logic is_read); - automatic logic rand_success; - automatic ax_ace_beat_t ax_ace_beat = new; - automatic addr_t addr; - automatic burst_t burst; - automatic cache_t cache; - automatic id_t id; - automatic qos_t qos; - automatic len_t len; - automatic size_t size; - automatic bar_t bar; - automatic domain_t domain; - automatic snoop_t snoop; - automatic awunique_t awunique; - automatic int unsigned mem_region_idx; - automatic mem_region_t mem_region; - automatic int cprob; - ar_snoop_e ar_trs; - aw_snoop_e aw_trs; - // No memory regions defined - if (mem_map.size() == 0) begin - // Return a dummy region - mem_region = '{ - addr_begin: '0, - addr_end: '1, - mem_type: axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE - }; - end else begin - // Randomly pick a memory region - mem_region_idx = $urandom_range(0,mem_map.size()-1); - // std::randomize(mem_region_idx) with { - // mem_region_idx < mem_map.size(); - // }; assert(rand_success); - mem_region = mem_map[mem_region_idx]; - end - - // Randomly pick burst type. - burst = BURST_FIXED; - rand_success = std::randomize(burst) with { - burst inside {this.allowed_bursts}; - }; assert(rand_success); - ax_ace_beat.ax_burst = burst; - // Determine memory type. - ax_ace_beat.ax_cache = is_read ? axi_pkg::get_arcache(mem_region.mem_type) : axi_pkg::get_awcache(mem_region.mem_type); - // Randomize beat size. - if (TRAFFIC_SHAPING) begin - cprob = $urandom_range(0,max_cprob-1); - // rand_success = std::randomize(cprob) with { - // cprob >= 0; cprob < max_cprob; - // }; assert(rand_success); - - for (int i = 0; i < traffic_shape.size(); i++) - if (traffic_shape[i].cprob > cprob) begin - len = traffic_shape[i].len; - if (ax_ace_beat.ax_burst == BURST_WRAP) begin - assert (len inside {len_t'(1), len_t'(3), len_t'(7), len_t'(15)}); - end - break; - end - - // Randomize address. Make sure that the burst does not cross a 4KiB boundary. - forever begin - size = $clog2(AXI_STRB_WIDTH); - // rand_success = std::randomize(size) with { - // 2**size <= AXI_STRB_WIDTH; - // 2**size <= len; - // }; assert(rand_success); - ax_ace_beat.ax_size = size; - ax_ace_beat.ax_len = ((len + (1 << size) - 1) >> size) - 1; - - addr = mem_region.addr_begin; - // rand_success = std::randomize(addr) with { - // addr >= mem_region.addr_begin; - // addr <= mem_region.addr_end; - // addr + len <= mem_region.addr_end; - // }; assert(rand_success); - - if (ax_ace_beat.ax_burst == axi_pkg::BURST_FIXED) begin - if (((addr + 2**ax_ace_beat.ax_size) & PFN_MASK) == (addr & PFN_MASK)) begin - break; - end - end else begin // BURST_INCR - if (((addr + 2**ax_ace_beat.ax_size * (ax_ace_beat.ax_len + 1)) & PFN_MASK) == (addr & PFN_MASK)) begin - break; - end - end - end - end else begin - // Randomize address. Make sure that the burst does not cross a 4KiB boundary. - forever begin - // Randomize address - addr = $urandom_range(mem_region.addr_begin, mem_region.addr_end); - - if (ax_ace_beat.ax_burst == axi_pkg::BURST_FIXED) begin - if (((addr + 2**ax_ace_beat.ax_size) & PFN_MASK) == (addr & PFN_MASK)) begin - break; - end - end else begin // BURST_INCR, BURST_WRAP - if (((addr + 2**ax_ace_beat.ax_size * (ax_ace_beat.ax_len + 1)) & PFN_MASK) == (addr & PFN_MASK)) begin - break; - end - end - end - end - - id = $urandom(); - qos = $urandom(); - awunique = 0; - size = $clog2(AXI_STRB_WIDTH); - if (is_read) begin - // Read operation - std::randomize(ar_trs) with { !(ar_trs inside {ar_unsupported_ops}); }; - case( ar_trs ) - AR_READ_NO_SNOOP: begin - snoop = ace_pkg::ReadNoSnoop; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - AR_READ_ONCE: begin - snoop = ace_pkg::ReadOnce; - domain = 'b01; - bar = 'b00; - len = $urandom_range(0,1); - end - AR_READ_SHARED: begin - snoop = ace_pkg::ReadShared; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_READ_CLEAN: begin - snoop = ace_pkg::ReadClean; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_READ_NOT_SHARED_DIRTY: begin - snoop = ace_pkg::ReadNotSharedDirty; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_READ_UNIQUE: begin - snoop = ace_pkg::ReadUnique; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_CLEAN_UNIQUE: begin - snoop = ace_pkg::CleanUnique; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_MAKE_UNIQUE: begin - snoop = ace_pkg::CleanUnique; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_CLEAN_SHARED: begin - snoop = ace_pkg::CleanShared; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_CLEAN_INVALID: begin - snoop = ace_pkg::CleanInvalid; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_MAKE_INVALID: begin - snoop = ace_pkg::MakeInvalid; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_BARRIER: begin - snoop = ace_pkg::Barrier; - domain = 'b01; - bar = 'b01; - len = 1; - end - AR_DVM_COMPLETE: begin - snoop = ace_pkg::DVMComplete; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_DVM_MESSAGE: begin - snoop = ace_pkg::DVMMessage; - domain = 'b01; - bar = 'b00; - len = 1; - end - default: begin - $error("Invalid snoop op enumeration."); - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - endcase - end else begin - // Write operation - std::randomize(aw_trs) with { !(aw_trs inside {aw_unsupported_ops}); }; - case( aw_trs ) - AW_WRITE_NO_SNOOP: begin - snoop = ace_pkg::WriteNoSnoop; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - AW_WRITE_UNIQUE: begin - snoop = ace_pkg::WriteUnique; - domain = 'b01; - bar = 'b00; - len = 1; - end - AW_WRITE_LINE_UNIQUE: begin - snoop = ace_pkg::WriteLineUnique; - domain = 'b01; - bar = 'b00; - len = 1; - end - AW_WRITE_CLEAN: begin - snoop = ace_pkg::WriteClean; - domain = 'b01; - bar = 'b00; - len = 1; - end - AW_WRITE_BACK: begin - snoop = ace_pkg::WriteBack; - domain = 'b01; - bar = 'b00; - len = 1; - end - AW_EVICT: begin - snoop = ace_pkg::Evict; - domain = 'b01; - bar = 'b00; - len = 1; - end - AW_WRITE_EVICT: begin - snoop = ace_pkg::WriteEvict; - domain = 'b01; - bar = 'b00; - len = 1; - end - AR_MAKE_UNIQUE: begin - snoop = ace_pkg::CleanUnique; - domain = 'b01; - bar = 'b00; - len = 1; - end - AW_BARRIER: begin - snoop = ace_pkg::Barrier; - domain = 'b01; - bar = 'b01; - len = 1; - end - default: begin - $error("Invalid snoop op enumeration."); - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - endcase - end - - - ax_ace_beat.ax_addr = addr; - ax_ace_beat.ax_size = size; - ax_ace_beat.ax_len = len; - ax_ace_beat.ax_id = id; - ax_ace_beat.ax_qos = qos; - ax_ace_beat.ax_snoop = snoop; - ax_ace_beat.ax_bar = bar; - ax_ace_beat.ax_domain = domain; - ax_ace_beat.ax_awunique = awunique; - - return ax_ace_beat; - endfunction - - task rand_atop_burst(inout ax_ace_beat_t beat); - automatic logic rand_success; - beat.ax_atop[5:4] = $random(); - if (beat.ax_atop[5:4] != 2'b00 && !AXI_BURST_INCR) begin - // We can emit ATOPs only if INCR bursts are allowed. - $warning("ATOP suppressed because INCR bursts are disabled!"); - beat.ax_atop[5:4] = 2'b00; - end - if (beat.ax_atop[5:4] != 2'b00) begin // ATOP - // Determine `ax_atop`. - if (beat.ax_atop[5:4] == axi_pkg::ATOP_ATOMICSTORE || - beat.ax_atop[5:4] == axi_pkg::ATOP_ATOMICLOAD) begin - // Endianness - beat.ax_atop[3] = $random(); - // Atomic operation - beat.ax_atop[2:0] = $random(); - end else begin // Atomic{Swap,Compare} - beat.ax_atop[3:1] = '0; - beat.ax_atop[0] = $random(); - end - // Determine `ax_size` and `ax_len`. - if (2**beat.ax_size < AXI_STRB_WIDTH) begin - // Transaction does *not* occupy full data bus, so we must send just one beat. [E1.1.3] - beat.ax_len = '0; - end else begin - automatic int unsigned bytes; - if (beat.ax_atop == axi_pkg::ATOP_ATOMICCMP) begin - // Total data transferred in burst can be 2, 4, 8, 16, or 32 B. - automatic int unsigned log_bytes; - log_bytes = 3; - // rand_success = std::randomize(log_bytes) with { - // log_bytes > 0; 2**log_bytes <= 32; - // }; assert(rand_success); - bytes = 2**log_bytes; - end else begin - // Total data transferred in burst can be 1, 2, 4, or 8 B. - if (AXI_STRB_WIDTH >= 8) begin - bytes = AXI_STRB_WIDTH; - end else begin - automatic int unsigned log_bytes; - log_bytes = 5; - // rand_success = std::randomize(log_bytes); assert(rand_success); - log_bytes = log_bytes % (4 - $clog2(AXI_STRB_WIDTH)) - $clog2(AXI_STRB_WIDTH); - bytes = 2**log_bytes; - end - end - beat.ax_len = bytes / AXI_STRB_WIDTH - 1; - end - // Determine `ax_addr` and `ax_burst`. - if (beat.ax_atop == axi_pkg::ATOP_ATOMICCMP) begin - // The address must be aligned to half the outbound data size. [E1.1.3] - beat.ax_addr = beat.ax_addr & ~((1'b1 << beat.ax_size) - 1); - // If the address is aligned to the total size of outgoing data, the burst type must be - // INCR. Otherwise, it must be WRAP. [E1.1.3] - beat.ax_burst = (beat.ax_addr % ((beat.ax_len+1) * 2**beat.ax_size) == 0) ? - axi_pkg::BURST_INCR : axi_pkg::BURST_WRAP; - // If we are not allowed to emit WRAP bursts, align the address to the total size of - // outgoing data and fall back to INCR. - if (beat.ax_burst == axi_pkg::BURST_WRAP && !AXI_BURST_WRAP) begin - beat.ax_addr -= (beat.ax_addr % ((beat.ax_len+1) * 2**beat.ax_size)); - beat.ax_burst = axi_pkg::BURST_INCR; - end - end else begin - // The address must be aligned to the data size. [E1.1.3] - beat.ax_addr = beat.ax_addr & ~((1'b1 << (beat.ax_size+1)) - 1); - // Only INCR allowed. - beat.ax_burst = axi_pkg::BURST_INCR; - end - end - endtask - - function void rand_excl_ar(inout ax_ace_beat_t ar_ace_beat); - ar_ace_beat.ax_lock = $random(); - if (ar_ace_beat.ax_lock) begin - automatic logic rand_success; - automatic int unsigned n_bytes; - automatic size_t size; - automatic addr_t addr_mask; - ar_ace_beat.ax_size = $clog2(AXI_STRB_WIDTH); - - // The address must be aligned to the total number of bytes in the burst. - ar_ace_beat.ax_addr = ar_ace_beat.ax_addr & ~(2); - ar_ace_beat.ax_snoop = $urandom(); - if( ar_ace_beat.ax_snoop == 4'b1001 || ar_ace_beat.ax_snoop == 4'b1011) begin - ar_ace_beat.ax_len = 0; - end else begin - ar_ace_beat.ax_len = 1; - end - ar_ace_beat.ax_bar = $urandom(); - ar_ace_beat.ax_domain = $urandom(); - - end - endfunction - - // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with - // `this.drv.ace.clk_i` as `clk` argument. What is the syntax for getting an assignable - // reference? - task automatic rand_wait(input int unsigned min, max); - int unsigned rand_success, cycles; - cycles = $urandom_range(min,max); - // rand_success = std::randomize(cycles) with { - // cycles >= min; - // cycles <= max; - // }; - //assert (rand_success) else $error("Failed to randomize wait cycles!"); - repeat (cycles) @(posedge this.drv.ace.clk_i); - endtask - - // Determine if the ID of an AXI Ax beat is currently legal. This function may only be called - // while holding the `cnt_sem` semaphore. - function bit id_is_legal(input bit is_read, input ax_ace_beat_t beat); - if (AXI_ATOPS) begin - // The ID must not be the same as that of any in-flight ATOP. - if (atop_resp_b[beat.ax_id] || atop_resp_r[beat.ax_id]) return 1'b0; - // If this beat starts an ATOP, its ID must not be the same as that of any other in-flight - // AXI transaction. - if (!is_read && beat.ax_atop[5:4] != 2'b00 && ( - r_flight_cnt[beat.ax_id] != 0 || w_flight_cnt[beat.ax_id] !=0 - )) return 1'b0; - end - if (UNIQUE_IDS) begin - // This master may only emit transactions with an ID that is unique among all in-flight - // transactions in the same direction. - if (is_read && r_flight_cnt[beat.ax_id] != 0) return 1'b0; - if (!is_read && w_flight_cnt[beat.ax_id] != 0) return 1'b0; - end - // There is no reason why this ID would be illegal, so it is legal. - return 1'b1; - endfunction - - // Legalize the ID of an AXI Ax beat (drawing a new ID at random if the existing ID is currently - // not legal) and add it to the in-flight transactions. - task legalize_id(input bit is_read, inout ax_ace_beat_t beat); - automatic logic rand_success; - automatic id_t id = beat.ax_id; - // Loop until a legal ID is found. - forever begin - // Acquire semaphore on in-flight counters. - cnt_sem.get(); - // Exit loop if the current ID is legal. - if (id_is_legal(is_read, beat)) begin - break; - end else begin - // The current ID is currently not legal, so try another ID in the next cycle and - // release the semaphore until then. - cnt_sem.put(); - rand_wait(1, 1); - if (!beat.ax_lock) begin // The ID of an exclusive transfer must not be changed. - //rand_success = std::randomize(id); assert(rand_success); - id = 1; - beat.ax_id = id; - end - end - end - // Mark transfer for decided ID as in flight. - if (!is_read) begin - w_flight_cnt[beat.ax_id]++; - tot_w_flight_cnt++; - if (beat.ax_atop != 2'b00) begin - // This is an ATOP, so it gives rise to a write response. - atop_resp_b[beat.ax_id] = 1'b1; - if (beat.ax_atop[axi_pkg::ATOP_R_RESP]) begin - // This ATOP type additionally gives rise to a read response. - atop_resp_r[beat.ax_id] = 1'b1; - end - end - end else begin - r_flight_cnt[beat.ax_id]++; - tot_r_flight_cnt++; - end - // Release semaphore on in-flight counters. - cnt_sem.put(); - endtask - - task send_ars(input int n_reads); - automatic logic rand_success; - repeat (n_reads) begin - automatic id_t id; - automatic ax_ace_beat_t ar_ace_beat = new_rand_burst(1'b1); - while (tot_r_flight_cnt >= MAX_READ_TXNS) begin - rand_wait(1, 1); - end - if (AXI_EXCLS) begin - rand_excl_ar(ar_ace_beat); - end - legalize_id(1'b1, ar_ace_beat); - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - drv.send_ar(ar_ace_beat); - if (ar_ace_beat.ax_lock) excl_queue.push_back(ar_ace_beat); - end - $info("Finish ARs"); - endtask - - task recv_rs(ref logic ar_done, aw_done); - while (!(ar_done && tot_r_flight_cnt == 0 && - (!AXI_ATOPS || (AXI_ATOPS && aw_done && atop_resp_r == '0)) - )) begin - automatic r_ace_beat_t r_ace_beat; - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - if (tot_r_flight_cnt > 0 || atop_resp_r > 0) begin - drv.recv_r(r_ace_beat); - if (r_ace_beat.r_last) begin - cnt_sem.get(); - if (atop_resp_r[r_ace_beat.r_id]) begin - atop_resp_r[r_ace_beat.r_id] = 1'b0; - end else begin - r_flight_cnt[r_ace_beat.r_id]--; - tot_r_flight_cnt--; - end - cnt_sem.put(); - end - end - end - $info("Finish Rs"); - endtask - - task create_aws(input int n_writes); - automatic logic rand_success; - repeat (n_writes) begin - automatic bit excl = 1'b0; - automatic ax_ace_beat_t aw_ace_beat; - if (AXI_EXCLS && excl_queue.size() > 0) excl = $random(); - if (excl) begin - aw_ace_beat = excl_queue.pop_front(); - end else begin - aw_ace_beat = new_rand_burst(1'b0); - if (AXI_ATOPS) rand_atop_burst(aw_ace_beat); - end - while (tot_w_flight_cnt >= MAX_WRITE_TXNS) begin - rand_wait(1, 1); - end - legalize_id(1'b0, aw_ace_beat); - aw_ace_queue.push_back(aw_ace_beat); - w_queue.push_back(aw_ace_beat); - end - $info("Finish AWs"); - endtask - - task send_aws(ref logic aw_done); - while (!(aw_done && aw_ace_queue.size() == 0)) begin - automatic ax_ace_beat_t aw_ace_beat; - wait (aw_ace_queue.size() > 0 || (aw_done && aw_ace_queue.size() == 0)); - aw_ace_beat = aw_ace_queue.pop_front(); - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - drv.send_aw(aw_ace_beat); - end - endtask - - task send_ws(ref logic aw_done); - while (!(aw_done && w_queue.size() == 0)) begin - automatic ax_ace_beat_t aw_ace_beat; - automatic addr_t addr; - static logic rand_success; - wait (w_queue.size() > 0 || (aw_done && w_queue.size() == 0)); - aw_ace_beat = w_queue.pop_front(); - for (int unsigned i = 0; i < aw_ace_beat.ax_len + 1; i++) begin - automatic w_beat_t w_beat = new; - automatic int unsigned begin_byte, end_byte, n_bytes; - automatic logic [AXI_STRB_WIDTH-1:0] rand_strb, strb_mask; - addr = axi_pkg::beat_addr(aw_ace_beat.ax_addr, aw_ace_beat.ax_size, aw_ace_beat.ax_len, - aw_ace_beat.ax_burst, i); - //rand_success = w_beat.randomize(); assert (rand_success); - // Determine strobe. - w_beat.w_strb = '0; - n_bytes = 2**aw_ace_beat.ax_size; - begin_byte = addr % AXI_STRB_WIDTH; - end_byte = ((begin_byte + n_bytes) >> aw_ace_beat.ax_size) << aw_ace_beat.ax_size; - strb_mask = '0; - for (int unsigned b = begin_byte; b < end_byte; b++) - strb_mask[b] = 1'b1; - rand_strb = $urandom(); - //rand_success = std::randomize(rand_strb); assert (rand_success); - w_beat.w_strb |= (rand_strb & strb_mask); - // Determine last. - w_beat.w_last = (i == aw_ace_beat.ax_len); - rand_wait(W_MIN_WAIT_CYCLES, W_MAX_WAIT_CYCLES); - drv.send_w(w_beat); - end - end - $info("Finish Ws"); - endtask - - task recv_bs(ref logic aw_done); - while (!(aw_done && tot_w_flight_cnt == 0)) begin - automatic b_beat_t b_beat; - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - drv.recv_b(b_beat); - cnt_sem.get(); - if (atop_resp_b[b_beat.b_id]) begin - atop_resp_b[b_beat.b_id] = 1'b0; - end - w_flight_cnt[b_beat.b_id]--; - tot_w_flight_cnt--; - cnt_sem.put(); - end - $info("Finish Bs"); - endtask - - // Issue n_reads random read and n_writes random write transactions to an address range. - task run(input int n_reads, input int n_writes); - automatic logic ar_done = 1'b0, - aw_done = 1'b0; - fork - begin - send_ars(n_reads); - ar_done = 1'b1; - end - recv_rs(ar_done, aw_done); - begin - create_aws(n_writes); - aw_done = 1'b1; - end - send_aws(aw_done); - send_ws(aw_done); - recv_bs(aw_done); - join - endtask - - endclass - - class ace_rand_slave #( - // AXI interface parameters - parameter int AW = 32, - parameter int DW = 32, - parameter int IW = 8, - parameter int UW = 1, - // Stimuli application and test time - parameter time TA = 0ps, - parameter time TT = 0ps, - parameter bit RAND_RESP = 0, - // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels - parameter int AX_MIN_WAIT_CYCLES = 0, - parameter int AX_MAX_WAIT_CYCLES = 100, - parameter int R_MIN_WAIT_CYCLES = 0, - parameter int R_MAX_WAIT_CYCLES = 5, - parameter int RESP_MIN_WAIT_CYCLES = 0, - parameter int RESP_MAX_WAIT_CYCLES = 20, - /// This parameter eneables an internal memory, which gets randomly initialized, if it is read - /// and retains written data. This mode does currently not support `axi_pkg::BURST_WRAP`! - /// All responses are `axi_pkg::RESP_OKAY` when in this mode. - parameter bit MAPPED = 1'b0 - ); - typedef ace_test::ace_driver #( - .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TA), .TT(TT) - ) ace_driver_t; - typedef rand_id_queue_pkg::rand_id_queue #( - .data_t (ace_driver_t::ax_ace_beat_t), - .ID_WIDTH (IW) - ) rand_ax_ace_beat_queue_t; - typedef ace_driver_t::ax_ace_beat_t ax_ace_beat_t; - typedef ace_driver_t::b_beat_t b_beat_t; - typedef ace_driver_t::r_ace_beat_t r_ace_beat_t; - typedef ace_driver_t::w_beat_t w_beat_t; - - typedef logic [AW-1:0] addr_t; - typedef logic [7:0] byte_t; - - ace_driver_t drv; - rand_ax_ace_beat_queue_t ar_ace_queue; - ax_ace_beat_t aw_ace_queue[$]; - int unsigned b_wait_cnt; - - // Memory array for when the `MAPPED` parameter is set. - byte_t memory_q[addr_t]; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) ace - ); - this.drv = new(ace); - this.ar_ace_queue = new; - this.b_wait_cnt = 0; - this.reset(); - endfunction - - function void reset(); - this.drv.reset_slave(); - this.memory_q.delete(); - endfunction - - // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with - // `this.drv.ace.clk_i` as `clk` argument. What is the syntax getting an assignable reference? - task automatic rand_wait(input int unsigned min, max); - int unsigned rand_success, cycles; - cycles = $urandom_range(min,max); - // rand_success = std::randomize(cycles) with { - // cycles >= min; - // cycles <= max; - // }; - // assert (rand_success) else $error("Failed to randomize wait cycles!"); - repeat (cycles) @(posedge this.drv.ace.clk_i); - endtask - - task recv_ars(); - forever begin - automatic ax_ace_beat_t ar_ace_beat; - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - drv.recv_ar(ar_ace_beat); - if (MAPPED) begin - assert (ar_ace_beat.ax_burst != axi_pkg::BURST_WRAP) else - $error("axi_pkg::BURST_WRAP not supported in MAPPED mode."); - end - ar_ace_queue.push(ar_ace_beat.ax_id, ar_ace_beat); - end - endtask - - task send_rs(); - forever begin - automatic logic rand_success; - automatic ax_ace_beat_t ar_ace_beat; - automatic r_ace_beat_t r_ace_beat = new; - automatic addr_t byte_addr; - wait (ar_ace_queue.size > 0); - ar_ace_beat = ar_ace_queue.peek(); - byte_addr = axi_pkg::aligned_addr(ar_ace_beat.ax_addr, axi_pkg::size_t'($clog2(DW/8))); - //rand_success = std::randomize(r_beat); assert(rand_success); - //rand_success = r_beat.randomize(); assert(rand_success); - if (MAPPED) begin - // Either use the actual data, or save the random generated. - for (int unsigned i = 0; i < (DW/8); i++) begin - if (this.memory_q.exists(byte_addr)) begin - r_ace_beat.r_data[i*8+:8] = this.memory_q[byte_addr]; - end else begin - this.memory_q[byte_addr] = r_ace_beat.r_data[i*8+:8]; - end - byte_addr++; - end - r_ace_beat.r_resp = axi_pkg::RESP_OKAY; - end - r_ace_beat.r_id = ar_ace_beat.ax_id; - if (RAND_RESP && !ar_ace_beat.ax_atop[axi_pkg::ATOP_R_RESP]) - r_ace_beat.r_resp[1] = $random(); - if (ar_ace_beat.ax_lock) - r_ace_beat.r_resp[0]= $random(); - r_ace_beat.r_resp[2] = $random(); - r_ace_beat.r_resp[3] = $random(); - rand_wait(R_MIN_WAIT_CYCLES, R_MAX_WAIT_CYCLES); - if (ar_ace_beat.ax_len == '0) begin - r_ace_beat.r_last = 1'b1; - void'(ar_ace_queue.pop_id(ar_ace_beat.ax_id)); - end else begin - if ((ar_ace_beat.ax_burst == axi_pkg::BURST_INCR) && MAPPED) begin - ar_ace_beat.ax_addr = axi_pkg::aligned_addr(ar_ace_beat.ax_addr, ar_ace_beat.ax_size) + - 2**ar_ace_beat.ax_size; - end - ar_ace_beat.ax_len--; - ar_ace_queue.set(ar_ace_beat.ax_id, ar_ace_beat); - end - drv.send_r(r_ace_beat); - end - endtask - - task recv_aws(); - forever begin - automatic ax_ace_beat_t aw_ace_beat; - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - drv.recv_aw(aw_ace_beat); - if (MAPPED) begin - assert (aw_ace_beat.ax_atop == '0) else - $error("ATOP not supported in MAPPED mode."); - assert (aw_ace_beat.ax_burst != axi_pkg::BURST_WRAP) else - $error("axi_pkg::BURST_WRAP not supported in MAPPED mode."); - end - aw_ace_queue.push_back(aw_ace_beat); - // Atomic{Load,Swap,Compare}s require an R response. - if (aw_ace_beat.ax_atop[axi_pkg::ATOP_R_RESP]) begin - ar_ace_queue.push(aw_ace_beat.ax_id, aw_ace_beat); - end - end - endtask - - task recv_ws(); - forever begin - automatic ax_ace_beat_t aw_ace_beat; - automatic addr_t byte_addr; - forever begin - automatic w_beat_t w_beat; - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - drv.recv_w(w_beat); - if (MAPPED) begin - wait (aw_ace_queue.size() > 0); - aw_ace_beat = aw_ace_queue[0]; - byte_addr = axi_pkg::aligned_addr(aw_ace_beat.ax_addr, $clog2(DW/8)); - - // Write Data if the strobe is defined - for (int unsigned i = 0; i < (DW/8); i++) begin - if (w_beat.w_strb[i]) begin - this.memory_q[byte_addr] = w_beat.w_data[i*8+:8]; - end - byte_addr++; - end - // Update address in beat - if (aw_ace_beat.ax_burst == axi_pkg::BURST_INCR) begin - aw_ace_beat.ax_addr = axi_pkg::aligned_addr(aw_ace_beat.ax_addr, aw_ace_beat.ax_size) + - 2**aw_ace_beat.ax_size; - end - aw_ace_queue[0] = aw_ace_beat; - end - if (w_beat.w_last) - break; - end - b_wait_cnt++; - end - endtask - - task send_bs(); - forever begin - automatic ax_ace_beat_t aw_ace_beat; - automatic b_beat_t b_beat = new; - automatic logic rand_success; - wait (b_wait_cnt > 0 && (aw_ace_queue.size() != 0)); - aw_ace_beat = aw_ace_queue.pop_front(); - //rand_success = b_beat.randomize(); assert(rand_success); - b_beat.b_id = aw_ace_beat.ax_id; - if (RAND_RESP && !aw_ace_beat.ax_atop[axi_pkg::ATOP_R_RESP]) - b_beat.b_resp[1] = $random(); - if (aw_ace_beat.ax_lock) begin - b_beat.b_resp[0]= $random(); - end - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - if (MAPPED) begin - b_beat.b_resp = axi_pkg::RESP_OKAY; - end - drv.send_b(b_beat); - b_wait_cnt--; - end - endtask - - task run(); - fork - recv_ars(); - send_rs(); - recv_aws(); - recv_ws(); - send_bs(); - join - endtask - - endclass - - /// ACE Monitor. - class ace_monitor #( - /// AXI4+ATOP ID width - parameter int unsigned IW = 0, - /// AXI4+ATOP address width - parameter int unsigned AW = 0, - /// AXI4+ATOP data width - parameter int unsigned DW = 0, - /// AXI4+ATOP user width - parameter int unsigned UW = 0, - /// Stimuli test time - parameter time TT = 0ns - ); - - typedef ace_test::ace_driver #( - .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TT), .TT(TT) - ) ace_driver_t; - - typedef ace_driver_t::ax_ace_beat_t ax_ace_beat_t; - typedef ace_driver_t::w_beat_t w_beat_t; - typedef ace_driver_t::b_beat_t b_beat_t; - typedef ace_driver_t::r_ace_beat_t r_ace_beat_t; - - ace_driver_t drv; - mailbox aw_mbx = new, w_mbx = new, b_mbx = new, - ar_mbx = new, r_mbx = new; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) axi - ); - this.drv = new(axi); - endfunction - - task monitor; - fork - // AW - forever begin - automatic ax_ace_beat_t ax; - this.drv.mon_aw(ax); - aw_mbx.put(ax); - end - // W - forever begin - automatic w_beat_t w; - this.drv.mon_w(w); - w_mbx.put(w); - end - // B - forever begin - automatic b_beat_t b; - this.drv.mon_b(b); - b_mbx.put(b); - end - // AR - forever begin - automatic ax_ace_beat_t ax; - this.drv.mon_ar(ax); - ar_mbx.put(ax); - end - // R - forever begin - automatic r_ace_beat_t r; - this.drv.mon_r(r); - r_mbx.put(r); - end - join - endtask - endclass - -endpackage - -// non synthesisable axi logger module -// this module logs the activity of the input axi channel -// the log files will be found in "./axi_log//" -// one log file for all writes -// a log file per id for the reads -// atomic transactions with read response are injected into the corresponding log file of the read -module ace_chan_logger #( - parameter time TestTime = 8ns, // Time after clock, where sampling happens - parameter string LoggerName = "ace_logger", // name of the logger - parameter type aw_chan_t = logic, // axi AW type - parameter type w_chan_t = logic, // axi W type - parameter type b_chan_t = logic, // axi B type - parameter type ar_chan_t = logic, // axi AR type - parameter type r_chan_t = logic // axi R type -) ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low, when `1'b0` no sampling - input logic end_sim_i, // end of simulation - // AW channel - input aw_chan_t aw_chan_i, - input logic aw_valid_i, - input logic aw_ready_i, - // W channel - input w_chan_t w_chan_i, - input logic w_valid_i, - input logic w_ready_i, - // B channel - input b_chan_t b_chan_i, - input logic b_valid_i, - input logic b_ready_i, - // AR channel - input ar_chan_t ar_chan_i, - input logic ar_valid_i, - input logic ar_ready_i, - // R channel - input r_chan_t r_chan_i, - input logic r_valid_i, - input logic r_ready_i -); - // id width from channel - localparam int unsigned IdWidth = $bits(aw_chan_i.id); - localparam int unsigned NoIds = 2**IdWidth; - - // queues for writes and reads - aw_chan_t aw_queue[$]; - w_chan_t w_queue[$]; - b_chan_t b_queue[$]; - aw_chan_t ar_queues[NoIds-1:0][$]; - r_chan_t r_queues[NoIds-1:0][$]; - - // channel sampling into queues - always @(posedge clk_i) #TestTime begin : proc_channel_sample - automatic aw_chan_t ar_beat; - automatic int fd; - automatic string log_file; - automatic string log_str; - // only execute when reset is high - if (rst_ni) begin - // AW channel - if (aw_valid_i && aw_ready_i) begin - aw_queue.push_back(aw_chan_i); - log_file = $sformatf("./axi_log/%s/write.log", LoggerName); - fd = $fopen(log_file, "a"); - if (fd) begin - log_str = $sformatf("%0t> ID: %h AW on channel: LEN: %d, ATOP: %b", - $time, aw_chan_i.id, aw_chan_i.len, aw_chan_i.atop); - $fdisplay(fd, log_str); - $fclose(fd); - end - - // inject AR into queue, if there is an atomic - if (aw_chan_i.atop[axi_pkg::ATOP_R_RESP]) begin - $display("Atomic detected with response"); - ar_beat.id = aw_chan_i.id; - ar_beat.addr = aw_chan_i.addr; - if (aw_chan_i.len > 1) begin - ar_beat.len = aw_chan_i.len / 2; - end else begin - ar_beat.len = aw_chan_i.len; - end - ar_beat.size = aw_chan_i.size; - ar_beat.burst = aw_chan_i.burst; - ar_beat.lock = aw_chan_i.lock; - ar_beat.cache = aw_chan_i.cache; - ar_beat.prot = aw_chan_i.prot; - ar_beat.qos = aw_chan_i.qos; - ar_beat.region = aw_chan_i.region; - ar_beat.atop = aw_chan_i.atop; - ar_beat.user = aw_chan_i.user; - ar_queues[aw_chan_i.id].push_back(ar_beat); - log_file = $sformatf("./axi_log/%s/read_%0h.log", LoggerName, aw_chan_i.id); - fd = $fopen(log_file, "a"); - if (fd) begin - log_str = $sformatf("%0t> ID: %h AR on channel: LEN: %d injected ATOP: %b", - $time, ar_beat.id, ar_beat.len, ar_beat.atop); - $fdisplay(fd, log_str); - $fclose(fd); - end - end - end - // W channel - if (w_valid_i && w_ready_i) begin - w_queue.push_back(w_chan_i); - end - // B channel - if (b_valid_i && b_ready_i) begin - b_queue.push_back(b_chan_i); - end - // AR channel - if (ar_valid_i && ar_ready_i) begin - log_file = $sformatf("./axi_log/%s/read_%0h.log", LoggerName, ar_chan_i.id); - fd = $fopen(log_file, "a"); - if (fd) begin - log_str = $sformatf("%0t> ID: %h AR on channel: LEN: %d", - $time, ar_chan_i.id, ar_chan_i.len); - $fdisplay(fd, log_str); - $fclose(fd); - end - ar_beat.id = ar_chan_i.id; - ar_beat.addr = ar_chan_i.addr; - ar_beat.len = ar_chan_i.len; - ar_beat.size = ar_chan_i.size; - ar_beat.burst = ar_chan_i.burst; - ar_beat.lock = ar_chan_i.lock; - ar_beat.cache = ar_chan_i.cache; - ar_beat.prot = ar_chan_i.prot; - ar_beat.qos = ar_chan_i.qos; - ar_beat.region = ar_chan_i.region; - ar_beat.atop = '0; - ar_beat.user = ar_chan_i.user; - ar_beat.snoop=ar_chan_i.snoop; - ar_beat.bar=ar_chan_i.bar; - ar_beat.domain=ar_chan_i.domain; - ar_queues[ar_chan_i.id].push_back(ar_beat); - end - // R channel - if (r_valid_i && r_ready_i) begin - r_queues[r_chan_i.id].push_back(r_chan_i); - end - end - end - - initial begin : proc_log - automatic string log_name; - automatic string log_string; - automatic aw_chan_t aw_beat; - automatic w_chan_t w_beat; - automatic int unsigned no_w_beat = 0; - automatic b_chan_t b_beat; - automatic aw_chan_t ar_beat; - automatic r_chan_t r_beat; - automatic int unsigned no_r_beat[NoIds]; - automatic int fd; - - // init r counter - for (int unsigned i = 0; i < NoIds; i++) begin - no_r_beat[i] = 0; - end - - // make the log dirs - log_name = $sformatf("mkdir -p ./axi_log/%s/", LoggerName); - $system(log_name); - - // open log files - log_name = $sformatf("./axi_log/%s/write.log", LoggerName); - fd = $fopen(log_name, "w"); - if (fd) begin - $display("File was opened successfully : %s", log_name); - $fdisplay(fd, "This is the write log file"); - $fclose(fd); - end else - $display("File was NOT opened successfully : %s", log_name); - for (int unsigned i = 0; i < NoIds; i++) begin - log_name = $sformatf("./axi_log/%s/read_%0h.log", LoggerName, i); - fd = $fopen(log_name, "w"); - if (fd) begin - $display("File was opened successfully : %s", log_name); - $fdisplay(fd, "This is the read log file for ID: %0h", i); - $fclose(fd); - end else - $display("File was NOT opened successfully : %s", log_name); - end - - // on each clock cycle update the logs if there is something in the queues - wait (rst_ni); - while (!end_sim_i) begin - @(posedge clk_i); - - // update the write log file - while (aw_queue.size() != 0 && w_queue.size() != 0) begin - aw_beat = aw_queue[0]; - w_beat = w_queue.pop_front(); - - log_string = $sformatf("%0t> ID: %h W %d of %d, LAST: %b ATOP: %b", - $time, aw_beat.id, no_w_beat, aw_beat.len, w_beat.last, aw_beat.atop); - - log_name = $sformatf("./axi_log/%s/write.log", LoggerName); - fd = $fopen(log_name, "a"); - if (fd) begin - $fdisplay(fd, log_string); - // write out error if last beat does not match! - if (w_beat.last && !(aw_beat.len == no_w_beat)) begin - $fdisplay(fd, "ERROR> Last flag was not expected!!!!!!!!!!!!!"); - end - $fclose(fd); - end - // pop the AW if the last flag is set - no_w_beat++; - if (w_beat.last) begin - aw_beat = aw_queue.pop_front(); - no_w_beat = 0; - end - end - - // check b queue - if (b_queue.size() != 0) begin - b_beat = b_queue.pop_front(); - log_string = $sformatf("%0t> ID: %h B recieved", - $time, b_beat.id); - log_name = $sformatf("./axi_log/%s/write.log", LoggerName); - fd = $fopen(log_name, "a"); - if (fd) begin - $fdisplay(fd, log_string); - $fclose(fd); - end - end - - // update the read log files - for (int unsigned i = 0; i < NoIds; i++) begin - while (ar_queues[i].size() != 0 && r_queues[i].size() != 0) begin - ar_beat = ar_queues[i][0]; - r_beat = r_queues[i].pop_front(); - - log_name = $sformatf("./axi_log/%s/read_%0h.log", LoggerName, i); - fd = $fopen(log_name, "a"); - if (fd) begin - log_string = $sformatf("%0t> ID: %h R %d of %d, LAST: %b ATOP: %b", - $time, r_beat.id, no_r_beat[i], ar_beat.len, r_beat.last, ar_beat.atop); - - $fdisplay(fd, log_string); - // write out error if last beat does not match! - if (r_beat.last && !(ar_beat.len == no_r_beat[i])) begin - $fdisplay(fd, "ERROR> Last flag was not expected!!!!!!!!!!!!!"); - end - $fclose(fd); - end - no_r_beat[i]++; - // pop the queue if it is the last flag - if (r_beat.last) begin - ar_beat = ar_queues[i].pop_front(); - no_r_beat[i] = 0; - end - end - end - end - $fclose(fd); - end -endmodule diff --git a/src/deprecated/snoop_test.sv b/src/deprecated/snoop_test.sv deleted file mode 100644 index f59cd7d..0000000 --- a/src/deprecated/snoop_test.sv +++ /dev/null @@ -1,705 +0,0 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna -// Copyright (c) 2022 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// - - -/// A set of testbench utilities for AXI interfaces. -package snoop_test; - - import axi_pkg::*; - import ace_pkg::*; - - typedef enum logic [3:0] { - AC_READ_ONCE = 0, - AC_READ_SHARED = 1, - AC_READ_CLEAN = 2, - AC_READ_NOT_SHARED_DIRTY = 3, - AC_READ_UNIQUE = 4, - AC_CLEAN_SHARED = 5, - AC_CLEAN_INVALID = 6, - AC_MAKE_INVALID = 7, - AC_DVM_COMPLETE = 8, - AC_DVM_MESSAGE = 9 - } ac_snoop_e; - - /// The data transferred on a beat on the AC channel. - class ace_ac_beat #( - parameter AW = 32 - ); - rand logic [AW-1:0] ac_addr = '0; - logic [3:0] ac_snoop = '0; - logic [2:0] ac_prot = '0; - endclass - - /// The data transferred on a beat on the CR channel. - class ace_cr_beat; - ace_pkg::crresp_t cr_resp = '0; - endclass - - /// The data transferred on a beat on the CD channel. - class ace_cd_beat #( - parameter DW = 32 - ); - rand logic [DW-1:0] cd_data = '0; - logic cd_last; - endclass - - class snoop_driver #( - parameter AW = 32, - parameter DW = 32, - parameter time TA = 0ns , // stimuli application time - parameter time TT = 0ns // stimuli test time - ); - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop; - - typedef ace_ac_beat #(.AW(AW)) ace_ac_beat_t; - typedef ace_cd_beat #(.DW(DW)) ace_cd_beat_t; - typedef ace_cr_beat ace_cr_beat_t; - - function new( - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop - ); - this.snoop = snoop; - endfunction - - function void reset_master(); - snoop.ac_valid <= '0; - snoop.ac_addr <= '0; - snoop.ac_snoop <= '0; - snoop.ac_prot <= '0; - snoop.cr_ready <= '0; - snoop.cd_ready <= '0; - endfunction - - function void reset_slave(); - snoop.ac_ready <= '0; - snoop.cr_valid <= '0; - snoop.cr_resp <= '0; - snoop.cd_valid <= '0; - snoop.cd_data <= '0; - snoop.cd_last <= '0; - endfunction - - task cycle_start; - #TT; - endtask - - task cycle_end; - @(posedge snoop.clk_i); - endtask - - /// Issue a beat on the AC channel. - task send_ac ( - input ace_ac_beat_t beat - ); - snoop.ac_valid <= #TA 1; - snoop.ac_addr <= #TA beat.ac_addr; - snoop.ac_snoop <= #TA beat.ac_snoop; - snoop.ac_prot <= #TA beat.ac_prot; - cycle_start(); - while (snoop.ac_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - snoop.ac_valid <= #TA '0; - snoop.ac_addr <= #TA '0; - snoop.ac_snoop <= #TA '0; - snoop.ac_prot <= #TA '0; - endtask - - /// Issue a beat on the CR channel. - task send_cr ( - input ace_cr_beat_t beat - ); - snoop.cr_valid <= #TA 1; - snoop.cr_resp <= #TA beat.cr_resp; - cycle_start(); - while (snoop.cr_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - snoop.cr_valid <= #TA '0; - snoop.cr_resp <= #TA '0; - endtask - - /// Issue a beat on the CD channel. - task send_cd ( - input ace_cd_beat_t beat - ); - snoop.cd_valid <= #TA 1; - snoop.cd_data <= #TA beat.cd_data; - snoop.cd_last <= #TA beat.cd_last; - cycle_start(); - while (snoop.cd_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - snoop.cd_valid <= #TA '0; - snoop.cd_data <= #TA '0; - snoop.cd_last <= #TA '0; - endtask - - /// Wait for a beat on the AC channel. - task recv_ac ( - output ace_ac_beat_t beat - ); - snoop.ac_ready <= #TA 1; - cycle_start(); - while (snoop.ac_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.ac_addr = snoop.ac_addr; - beat.ac_snoop = snoop.ac_snoop; - beat.ac_prot = snoop.ac_prot; - cycle_end(); - snoop.ac_ready <= #TA 0; - endtask - - /// Wait for a beat on the CR channel. - task recv_cr ( - output ace_cr_beat_t beat - ); - snoop.cr_ready <= #TA 1; - cycle_start(); - while (snoop.cr_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.cr_resp = snoop.cr_resp; - cycle_end(); - snoop.cr_ready <= #TA 0; - endtask - - /// Wait for a beat on the CD channel. - task recv_cd ( - output ace_cd_beat_t beat - ); - beat = new; - beat.cd_last = '0; - while (!beat.cd_last) begin - snoop.cd_ready <= #TA 1; - cycle_start(); - while (snoop.cd_valid != 1) begin cycle_end(); cycle_start(); end - beat.cd_data = snoop.cd_data; - beat.cd_last = snoop.cd_last; - cycle_end(); - snoop.cd_ready <= #TA 0; - end - endtask - - /// Monitor the AC channel and return the next beat. - task mon_ac ( - output ace_ac_beat_t beat - ); - cycle_start(); - while (!(snoop.ac_valid && snoop.ac_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.ac_addr = snoop.ac_addr; - beat.ac_snoop = snoop.ac_snoop; - beat.ac_prot = snoop.ac_prot; - cycle_end(); - endtask - - /// Monitor the CR channel and return the next beat. - task mon_cr ( - output ace_cr_beat_t beat - ); - cycle_start(); - while (!(snoop.cr_valid && snoop.cr_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.cr_resp = snoop.cr_resp; - cycle_end(); - endtask - - /// Monitor the CD channel and return the next beat. - task mon_cd ( - output ace_cd_beat_t beat - ); - cycle_start(); - while (!(snoop.cd_valid && snoop.cd_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.cd_data = snoop.cd_data; - beat.cd_last = snoop.cd_last; - cycle_end(); - endtask - - endclass - - class snoop_rand_master #( - // AXI interface parameters - parameter int AW = 32, - parameter int DW = 32, - // Stimuli application and test time - parameter time TA = 0ps, - parameter time TT = 0ps, - // Upper and lower bounds on wait cycles on AC, CR, and CD channels - parameter int AC_MIN_WAIT_CYCLES = 0, - parameter int AC_MAX_WAIT_CYCLES = 100, - parameter int CR_MIN_WAIT_CYCLES = 0, - parameter int CR_MAX_WAIT_CYCLES = 5, - parameter int CD_MIN_WAIT_CYCLES = 0, - parameter int CD_MAX_WAIT_CYCLES = 20 - ); - typedef snoop_test::snoop_driver #( - .AW(AW), .DW(DW), .TA(TA), .TT(TT) - ) snoop_driver_t; - typedef logic [AW-1:0] addr_t; - typedef logic [DW-1:0] data_t; - typedef ace_pkg::acsnoop_t acsnoop_t; - typedef ace_pkg::acprot_t acprot_t; - typedef ace_pkg::crresp_t crresp_t; - - typedef snoop_driver_t::ace_ac_beat_t ace_ac_beat_t; - typedef snoop_driver_t::ace_cr_beat_t ace_cr_beat_t; - typedef snoop_driver_t::ace_cd_beat_t ace_cd_beat_t; - - snoop_driver_t drv; - - typedef struct packed { - addr_t addr_begin; - addr_t addr_end; - mem_type_t mem_type; - } mem_region_t; - mem_region_t mem_map[$]; - - function new( - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop - ); - this.drv = new(snoop); - this.reset(); - endfunction - - function void reset(); - drv.reset_master(); - endfunction - - function void add_memory_region(input addr_t addr_begin, input addr_t addr_end, input mem_type_t mem_type); - mem_map.push_back({addr_begin, addr_end, mem_type}); - endfunction - - function ace_ac_beat_t new_rand_burst(); - automatic logic rand_success; - automatic ace_ac_beat_t ace_ac_beat = new; - automatic addr_t addr; - automatic ace_pkg::acsnoop_t snoop; - automatic ace_pkg::acprot_t prot; - automatic int unsigned mem_region_idx; - automatic mem_region_t mem_region; - automatic ac_snoop_e trs; - - // No memory regions defined - if (mem_map.size() == 0) begin - // Return a dummy region - mem_region = '{ - addr_begin: '0, - addr_end: '1, - mem_type: axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE - }; - end else begin - // Randomly pick a memory region - mem_region_idx = $urandom_range(0,mem_map.size()-1); - // std::randomize(mem_region_idx) with { - // mem_region_idx < mem_map.size(); - // }; assert(rand_success); - mem_region = mem_map[mem_region_idx]; - end - - // Randomize address - addr = mem_region.addr_begin + $urandom_range(mem_region.addr_end-mem_region.addr_begin+1); - - ace_ac_beat.ac_addr = addr; - - std::randomize(trs) with - {!(trs inside {AC_DVM_MESSAGE, AC_DVM_COMPLETE});}; // DVM not supported for the moment - - case (trs) - AC_READ_ONCE : snoop = ace_pkg::ReadOnce; - AC_READ_SHARED : snoop = ace_pkg::ReadShared; - AC_READ_CLEAN : snoop = ace_pkg::ReadClean; - AC_READ_NOT_SHARED_DIRTY: snoop = ace_pkg::ReadNotSharedDirty; - AC_READ_UNIQUE : snoop = ace_pkg::ReadUnique; - AC_CLEAN_SHARED : snoop = ace_pkg::CleanShared; - AC_CLEAN_INVALID : snoop = ace_pkg::CleanInvalid; - AC_MAKE_INVALID : snoop = ace_pkg::MakeInvalid; - AC_DVM_COMPLETE : snoop = ace_pkg::DVMComplete; - AC_DVM_MESSAGE : snoop = ace_pkg::DVMMessage; - endcase - - prot = $urandom(); - - // rand_success = std::randomize(id); assert(rand_success); - // rand_success = std::randomize(qos); assert(rand_success); - // The random ID *must* be legalized with `legalize_id()` before the beat is sent! This is - // currently done in the functions `create_aws()` and `send_ars()`. - ace_ac_beat.ac_snoop = snoop; - ace_ac_beat.ac_prot = prot; - - return ace_ac_beat; - endfunction - - // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with - // `this.drv.ace.clk_i` as `clk` argument. What is the syntax for getting an assignable - // reference? - task automatic rand_wait(input int unsigned min, max); - int unsigned rand_success, cycles; - cycles = $urandom_range(min,max); - // rand_success = std::randomize(cycles) with { - // cycles >= min; - // cycles <= max; - // }; - //assert (rand_success) else $error("Failed to randomize wait cycles!"); - repeat (cycles) @(posedge this.drv.snoop.clk_i); - endtask - - task send_acs(input int n_reads); - automatic logic rand_success; - repeat (n_reads) begin - automatic ace_ac_beat_t ace_ac_beat = new_rand_burst(); - rand_wait(AC_MIN_WAIT_CYCLES, AC_MAX_WAIT_CYCLES); - drv.send_ac(ace_ac_beat); - end - endtask - - task recv_crs(ref logic ac_done); - while (!ac_done) begin - automatic ace_cr_beat_t ace_cr_beat; - automatic ace_cd_beat_t ace_cd_beat; - rand_wait(CR_MIN_WAIT_CYCLES, CR_MAX_WAIT_CYCLES); - drv.recv_cr(ace_cr_beat); - if (!ace_cr_beat.cr_resp.Error & ace_cr_beat.cr_resp.DataTransfer) - drv.recv_cd(ace_cd_beat); - end - endtask - - task recv_cds(ref logic ac_done); - while (!ac_done) begin - automatic ace_cd_beat_t ace_cd_beat; - rand_wait(CD_MIN_WAIT_CYCLES, CD_MAX_WAIT_CYCLES); - drv.recv_cd(ace_cd_beat); - end - endtask - - // Issue n_reads random read transactions to an address range - task run(input int n_reads); - automatic logic ac_done = 1'b0; - fork - begin - send_acs(n_reads); - ac_done = 1'b1; - end - recv_crs(ac_done); - join - endtask - - endclass - - class snoop_rand_slave #( - // AXI interface parameters - parameter int AW = 32, - parameter int DW = 32, - // Stimuli application and test time - parameter time TA = 0ps, - parameter time TT = 0ps, - parameter bit RAND_RESP = 0, - // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels - parameter int AC_MIN_WAIT_CYCLES = 0, - parameter int AC_MAX_WAIT_CYCLES = 100, - parameter int CR_MIN_WAIT_CYCLES = 0, - parameter int CR_MAX_WAIT_CYCLES = 5, - parameter int CD_MIN_WAIT_CYCLES = 0, - parameter int CD_MAX_WAIT_CYCLES = 20 - ); - typedef snoop_test::snoop_driver #( - .AW(AW), .DW(DW), .TA(TA), .TT(TT) - ) snoop_driver_t; - typedef snoop_driver_t::ace_ac_beat_t ace_ac_beat_t; - typedef snoop_driver_t::ace_cr_beat_t ace_cr_beat_t; - typedef snoop_driver_t::ace_cd_beat_t ace_cd_beat_t; - - typedef logic [AW-1:0] addr_t; - - snoop_driver_t drv; - ace_ac_beat_t ace_ac_queue[$]; - int unsigned cd_wait_cnt; - - function new( - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop - ); - this.drv = new(snoop); - this.cd_wait_cnt = 0; - this.reset(); - endfunction - - function void reset(); - this.drv.reset_slave(); - endfunction - - // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with - // `this.drv.ace.clk_i` as `clk` argument. What is the syntax getting an assignable reference? - task automatic rand_wait(input int unsigned min, max); - int unsigned rand_success, cycles; - cycles = $urandom_range(min,max); - // rand_success = std::randomize(cycles) with { - // cycles >= min; - // cycles <= max; - // }; - // assert (rand_success) else $error("Failed to randomize wait cycles!"); - repeat (cycles) @(posedge this.drv.snoop.clk_i); - endtask - - task recv_acs(); - forever begin - automatic ace_ac_beat_t ace_ac_beat; - rand_wait(AC_MIN_WAIT_CYCLES, AC_MAX_WAIT_CYCLES); - drv.recv_ac(ace_ac_beat); - ace_ac_queue.push_back(ace_ac_beat); - end - endtask - - task send_crs(); - forever begin - automatic logic rand_success; - automatic ace_ac_beat_t ace_ac_beat; - automatic ace_cr_beat_t ace_cr_beat = new; - wait (ace_ac_queue.size() > 0); - ace_ac_beat = ace_ac_queue.pop_front(); - if(ace_ac_beat.ac_snoop == ace_pkg::CleanInvalid) begin - ace_cr_beat.cr_resp = 0; - end else begin - ace_cr_beat.cr_resp[4:2] = $urandom_range(0,3'b111);//$urandom_range(0,5'b11111); - ace_cr_beat.cr_resp[1] = 1'b0; - ace_cr_beat.cr_resp[0] = $urandom_range(0,1); - end - rand_wait(CR_MIN_WAIT_CYCLES, CR_MAX_WAIT_CYCLES); - if (ace_cr_beat.cr_resp.DataTransfer) begin - cd_wait_cnt++; - end - drv.send_cr(ace_cr_beat); - end - endtask - - task send_cds(); - forever begin - automatic logic rand_success; - automatic ace_ac_beat_t ace_ac_beat; - automatic ace_cd_beat_t ace_cd_beat = new; - automatic addr_t byte_addr; - wait (cd_wait_cnt > 0); - // random response - ace_cd_beat.cd_data = $urandom(); - ace_cd_beat.cd_last = 1'b0; - rand_wait(CD_MIN_WAIT_CYCLES, CD_MAX_WAIT_CYCLES); - drv.send_cd(ace_cd_beat); - ace_cd_beat.cd_data = $urandom(); - ace_cd_beat.cd_last = 1'b1; - rand_wait(CD_MIN_WAIT_CYCLES, CD_MAX_WAIT_CYCLES); - drv.send_cd(ace_cd_beat); - cd_wait_cnt--; - end - endtask - - task run(); - fork - recv_acs(); - send_crs(); - send_cds(); - join - endtask - - endclass - - /// Snoop Monitor. - class snoop_monitor #( - parameter AW = 32, - parameter DW = 32, - parameter time TA = 0ns , // stimuli application time - parameter time TT = 0ns // stimuli test time - ); - - typedef snoop_test::snoop_driver #( - .AW(AW), .DW(DW), .TA(TA), .TT(TT) - ) snoop_driver_t; - - typedef snoop_driver_t::ace_ac_beat_t ace_ac_beat_t; - typedef snoop_driver_t::ace_cd_beat_t ace_cd_beat_t; - typedef snoop_driver_t::ace_cr_beat_t ace_cr_beat_t; - - snoop_driver_t drv; - mailbox ac_mbx = new, cd_mbx = new, cr_mbx = new; - - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop; - - function new( - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop - ); - this.drv = new(snoop); - endfunction - - task monitor; - fork - // AC - forever begin - automatic ace_ac_beat_t beat; - this.drv.mon_ac(beat); - ac_mbx.put(beat); - end - // CR - forever begin - automatic ace_cr_beat_t beat; - this.drv.mon_cr(beat); - cr_mbx.put(beat); - end - // CD - forever begin - automatic ace_cd_beat_t beat; - this.drv.mon_cd(beat); - cd_mbx.put(beat); - end - join - endtask - endclass - -endpackage - - -// non synthesisable ace snoop logger module -// this module logs the activity of the input snoop channel -// the log files will be found in "./ace_log//" -// one log file for all writes -// a log file per id for the reads -// atomic transactions with read response are injected into the corresponding log file of the read -module snoop_chan_logger #( - parameter time TestTime = 8ns, // Time after clock, where sampling happens - parameter string LoggerName = "snoop_logger", // name of the logger - parameter type ac_chan_t = logic, // ACE AC type - parameter type cr_chan_t = logic, // ACE CR type - parameter type cd_chan_t = logic // ACE CD type -) ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low, when `1'b0` no sampling - input logic end_sim_i, // end of simulation - // AC channel - input ac_chan_t ac_chan_i, - input logic ac_valid_i, - input logic ac_ready_i, - // CR channel - input cr_chan_t cr_chan_i, - input logic cr_valid_i, - input logic cr_ready_i, - // CD channel - input cd_chan_t cd_chan_i, - input logic cd_valid_i, - input logic cd_ready_i -); - - // queues for writes and reads - ac_chan_t ac_queues[$]; - cr_chan_t cr_queues[$]; - cd_chan_t cd_queues[$]; - - // channel sampling into queues - always @(posedge clk_i) #TestTime begin : proc_channel_sample - automatic ac_chan_t ac_beat; - automatic int fd; - automatic string log_file; - automatic string log_str; - // only execute when reset is high - if (rst_ni) begin - // AC channel - if (ac_valid_i && ac_ready_i) begin - log_file = $sformatf("./ace_log/%s/snoop_read.log", LoggerName); - fd = $fopen(log_file, "a"); - if (fd) begin - log_str = $sformatf("%0t> AC, ADDR: 0x%h SNOOP %b, PROT %b", $time, ac_chan_i.addr, ac_chan_i.snoop, ac_chan_i.prot); - $fdisplay(fd, log_str); - $fclose(fd); - end - ac_beat.addr = ac_chan_i.addr; - ac_beat.snoop = ac_chan_i.snoop; - ac_beat.prot = ac_chan_i.prot; - ac_queues.push_back(ac_beat); - end - // CR channel - if (cr_valid_i && cr_ready_i) begin - cr_queues.push_back(cr_chan_i); - end - // CD channel - if (cd_valid_i && cd_ready_i) begin - cd_queues.push_back(cd_chan_i); - end - end - end - - initial begin : proc_log - automatic string log_name; - automatic string log_string; - automatic ac_chan_t ac_beat; - automatic cr_chan_t cr_beat; - automatic cd_chan_t cd_beat; - automatic int unsigned no_r_beat; - automatic int fd; - - no_r_beat = 0; - - // make the log dirs - log_name = $sformatf("mkdir -p ./ace_log/%s/", LoggerName); - $system(log_name); - - // open log files - log_name = $sformatf("./ace_log/%s/snoop_read.log", LoggerName); - fd = $fopen(log_name, "w"); - if (fd) begin - $display("File was opened successfully : %s", log_name); - $fclose(fd); - end else - $display("File was NOT opened successfully : %s", log_name); - - // on each clock cycle update the logs if there is something in the queues - wait (rst_ni); - while (!end_sim_i) begin - @(posedge clk_i); - - // update the read log files - while (ac_queues.size() != 0 && cr_queues.size() != 0) begin - ac_beat = ac_queues.pop_front(); - cr_beat = cr_queues.pop_front(); - log_name = $sformatf("./ace_log/%s/snoop_read.log", LoggerName); - fd = $fopen(log_name, "a"); - if (fd) begin - log_string = $sformatf("%0t ns> CR %d RESP: %b, ", - $time, no_r_beat, cr_beat); - $fdisplay(fd, log_string); - if (cr_beat.DataTransfer && !cr_beat.Error) begin - while(cd_queues.size() != 0) begin - cd_beat = cd_queues.pop_front(); - log_string = $sformatf("%0t ns> CD %d DATA: %h, ", - $time, no_r_beat, cd_beat.data); - $fdisplay(fd, log_string); - end - end - $fclose(fd); - end - no_r_beat++; - end - end - $fclose(fd); - end -endmodule From 123df29bbbd4079fbf4860dd925c85d05e9a8a9a Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Dec 2025 12:46:04 +0100 Subject: [PATCH 074/109] ccu_pkg: fix `axiDataSize` computation --- src/ccu/ccu_pkg.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index 04f635f..64a5eb0 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -74,7 +74,7 @@ package ccu_pkg; p.writeTransactionIndexWidth = u.numWriteTransactions > 1 ? $clog2(u.numWriteTransactions) : 1; p.cachelineAxiTransfers = u.cachelineWidth / u.axiDataWidth; p.transactionIndexWidth = u.numShareableTransactions > 1 ? $clog2(u.numShareableTransactions) : 1; - p.axiDataSize = (u.axiDataWidth > 8 ? $clog2(u.axiDataWidth / 8) : 1) - 1; + p.axiDataSize = u.axiDataWidth > 8 ? $clog2(u.axiDataWidth / 8) : 1; p.addressCheckWidth = u.addressCheckMsb - u.addressCheckLsb + 1; return p; From f641e757c1aea90f923777288ad7e327e10e7409 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Dec 2025 12:46:44 +0100 Subject: [PATCH 075/109] ccu_snoop_pipeline: fix AW address computation --- src/ccu/ccu_snoop_pipeline.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index dde435e..9d15995 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -397,7 +397,7 @@ module ccu_snoop_pipeline // writeback operations // TODO: this might be overkill? write_engine_aw_o.id = {1'b1, stage1_fifo_rdata.ar.id}; - write_engine_aw_o.addr = axi_pkg::aligned_addr(stage1_fifo_rdata.ar, ccuCfg.cachelineByteIndexWidth); + write_engine_aw_o.addr = axi_pkg::aligned_addr(stage1_fifo_rdata.ar.addr, ccuCfg.cachelineByteIndexWidth); write_engine_aw_o.len = ccuCfg.cachelineAxiTransfers - 1; write_engine_aw_o.size = ccuCfg.axiDataSize; write_engine_aw_o.burst = axi_pkg::BURST_WRAP; From ada265b171bd63ead9d4bc780802bff8b2f8b210 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Dec 2025 12:47:24 +0100 Subject: [PATCH 076/109] ccu_top: fix packed array declaration --- src/ccu/ccu_top.sv | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv index aa550ea..806f619 100644 --- a/src/ccu/ccu_top.sv +++ b/src/ccu/ccu_top.sv @@ -71,16 +71,16 @@ module ccu_top localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth; -logic scoreboard_full; -logic scoreboard_alloc_check; -logic scoreboard_alloc; -logic scoreboard_alloc_hit; -logic scoreboard_dealloc_check; -logic [ccuCfg.axiCcuIdWidth-1:0] scoreboard_dealloc_id; -logic scoreboard_dealloc_hit; -logic [scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_hit_entry; -logic [ccuCfg.u.numSubordinates] scoreboard_dealloc; -logic [ccuCfg.u.numSubordinates][scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_entry; +logic scoreboard_full; +logic scoreboard_alloc_check; +logic scoreboard_alloc; +logic scoreboard_alloc_hit; +logic scoreboard_dealloc_check; +logic [ccuCfg.axiCcuIdWidth-1:0] scoreboard_dealloc_id; +logic scoreboard_dealloc_hit; +logic [scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_hit_entry; +logic [ccuCfg.u.numSubordinates-1:0] scoreboard_dealloc; +logic [ccuCfg.u.numSubordinates-1:0][scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_entry; logic replay_alloc; logic replay_full; From 10d0b14d7d4d315eb2071eb601d8a7de866ab5d2 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Dec 2025 12:47:51 +0100 Subject: [PATCH 077/109] ccu_write_engine: fix signal name typo --- src/ccu/ccu_write_engine.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccu/ccu_write_engine.sv b/src/ccu/ccu_write_engine.sv index 4f30b28..0bc2e69 100644 --- a/src/ccu/ccu_write_engine.sv +++ b/src/ccu/ccu_write_engine.sv @@ -160,7 +160,7 @@ module ccu_write_engine .data_i (aw_is_writeback), .valid_i (w_ctrl_fifo_valid_in), .ready_o (w_ctrl_fifo_ready_in), - .data_o (w_is_write_back), + .data_o (w_is_writeback), .valid_o (w_ctrl_fifo_valid_out), .ready_i (w_ctrl_fifo_ready_out && w_o.last) ); @@ -172,7 +172,7 @@ module ccu_write_engine .inp_data_i ({writeback_w_i , w_i}), .inp_valid_i({writeback_w_valid_i, w_valid_i}), .inp_ready_o({writeback_w_ready_o, w_ready_o}), - .inp_sel_i (w_is_write_back), + .inp_sel_i (w_is_writeback), .oup_data_o (w_o), .oup_valid_o(w_mux_valid_out), .oup_ready_i(w_mux_ready_out) From 825020567d4e3f436e0b91a1266bf9bffdba2f24 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Dec 2025 12:48:14 +0100 Subject: [PATCH 078/109] ccu: remove shareable stall logic --- src/ccu/ccu_frontend.sv | 14 -------------- src/ccu/ccu_top.sv | 4 ---- 2 files changed, 18 deletions(-) diff --git a/src/ccu/ccu_frontend.sv b/src/ccu/ccu_frontend.sv index 757bd02..d8a7170 100644 --- a/src/ccu/ccu_frontend.sv +++ b/src/ccu/ccu_frontend.sv @@ -38,8 +38,6 @@ module ccu_frontend input logic clk_i, input logic rst_ni, - input logic shareable_stall_i, - input ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_req_i, output ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_resp_o, input logic [ccuCfg.u.numSubordinates-1:0] subordinate_rack_i, @@ -88,19 +86,12 @@ module ccu_frontend // {{{ for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_subordinate_monitor - logic ar_is_shareable; logic rack_fifo_full; rack_fifo_entry_t rack_fifo_wdata; rack_fifo_entry_t rack_fifo_rdata; logic rack_fifo_push; logic rack_fifo_pop; - assign ar_is_shareable = ace_ar_is_shareable( - subordinate_req_i[s].ar.bar[0], - subordinate_req_i[s].ar.domain, - subordinate_req_i[s].ar.snoop - ); - always_comb begin : ar_comb // Input request --> exclusive monitor `ACE_SET_AR_STRUCT(subordinate_ar[s], subordinate_req_i[s].ar) @@ -111,11 +102,6 @@ module ccu_frontend `ACE_SET_AR_STRUCT(subordinate_req[s].ar, exclusive_monitor_ar[s]) subordinate_req[s].ar_valid = exclusive_monitor_ar_valid[s]; exclusive_monitor_ar_ready[s] = subordinate_resp[s].ar_ready; - - if (ar_is_shareable && shareable_stall_i) begin - subordinate_req[s].ar_valid = 1'b0; - exclusive_monitor_ar_ready[s] = 1'b0; - end end always_comb begin : r_comb diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv index 806f619..ff2605b 100644 --- a/src/ccu/ccu_top.sv +++ b/src/ccu/ccu_top.sv @@ -121,9 +121,6 @@ ccu_axi_manager_resp_t manager_cut_resp; // The frontend acts as the Point of Serialization (PoS) ccu_ace_req_t frontend_req; ccu_ace_resp_t frontend_resp; - logic shareable_stall; - - assign shareable_stall = 1'b0; ccu_frontend #( .ccuCfg (ccuCfg), @@ -143,7 +140,6 @@ ccu_axi_manager_resp_t manager_cut_resp; ) u_ccu_frontend ( .clk_i, .rst_ni, - .shareable_stall_i (shareable_stall), .subordinate_req_i (subordinate_req_i), .subordinate_resp_o (subordinate_resp_o), .subordinate_rack_i (subordinate_rack_i), From abe0cc6b6b98c17ceb1fbde99dab670a981855d4 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Dec 2025 15:54:22 +0100 Subject: [PATCH 079/109] ccu_pkg: remove leftover comment --- src/ccu/ccu_pkg.sv | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index 64a5eb0..9ffe9fd 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -27,8 +27,6 @@ package ccu_pkg; int unsigned axiSubordinateIdWidth; // Cache parameters int unsigned cachelineWidth; - // LR/SC reservation buffer parameters - /* TODO */ // LSB address bit used for hazard checks (inclusive) int unsigned addressCheckLsb; // MSB address bit used for hazard checks (inclusive) From 3dac804832a4c79331f4524a56173159039ca44b Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Dec 2025 15:55:12 +0100 Subject: [PATCH 080/109] ccu_snoop_pipeline: use `CACHE_BUFFERABLE` from `axi_pkg` --- src/ccu/ccu_snoop_pipeline.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index 9d15995..81b7fbe 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -404,7 +404,7 @@ module ccu_snoop_pipeline write_engine_aw_o.lock = 1'b0; // Enforce non-bufferable requirements // This should fix premature B responses - write_engine_aw_o.cache = stage1_fifo_rdata.ar.cache & ~{axi_pkg::CacheWidth{1'b1}}; + write_engine_aw_o.cache = stage1_fifo_rdata.ar.cache & ~axi_pkg::CACHE_BUFFERABLE; write_engine_aw_o.prot = stage1_fifo_rdata.ar.prot; write_engine_aw_o.qos = stage1_fifo_rdata.ar.qos; write_engine_aw_o.region = stage1_fifo_rdata.ar.region; From 083073e9cdd07773adf3a7012397fd707c2edef5 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 12 Dec 2025 15:55:40 +0100 Subject: [PATCH 081/109] ccu_read_engine: add decoupling fallthrough AR FIFO --- src/ccu/ccu_read_engine.sv | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/ccu/ccu_read_engine.sv b/src/ccu/ccu_read_engine.sv index 2584d9b..478a692 100644 --- a/src/ccu/ccu_read_engine.sv +++ b/src/ccu/ccu_read_engine.sv @@ -50,12 +50,40 @@ module ccu_read_engine // AR channel // {{{ - assign ar_addr_check_o = ar_valid_i; - assign ar_addr_slice_o = ar_i.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; - assign ar_valid_o = !ar_addr_hit_i && ar_valid_i; - assign ar_ready_o = !ar_addr_hit_i && ar_ready_i; + logic ar_fifo_valid; + logic ar_fifo_ready; + ccu_axi_ar_t ar_fifo_wdata; + ccu_axi_ar_t ar_fifo_rdata; - `AXI_ASSIGN_AR_STRUCT(ar_o, ar_i) + `AXI_ASSIGN_AR_STRUCT(ar_fifo_wdata, ar_i) + + // Fallthrough FIFO inserted to decouple + // snoop pipeline requests from the read + // engine when address hazards happen + // TODO: is one entry enough? + stream_fifo #( + .FALL_THROUGH (1'b1), + .DEPTH (1), + .T (ccu_axi_ar_t) + ) u_ar_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (ar_fifo_wdata), + .valid_i (ar_valid_i), + .ready_o (ar_ready_o), + .data_o (ar_fifo_rdata), + .valid_o (ar_fifo_valid), + .ready_i (ar_fifo_ready) + ); + + assign ar_addr_check_o = ar_fifo_valid; + assign ar_addr_slice_o = ar_fifo_rdata.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + assign ar_valid_o = !ar_addr_hit_i && ar_fifo_valid; + assign ar_fifo_ready = !ar_addr_hit_i && ar_ready_i; + `AXI_ASSIGN_AR_STRUCT(ar_o, ar_fifo_rdata) // }}} // R channel From 03700654c615b2c6925346a2ece4758ddb2b238c Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 15 Dec 2025 10:38:19 +0100 Subject: [PATCH 082/109] ccu_snoop_pipeline: add stub for performance events --- src/ccu/ccu_pkg.sv | 13 +++++++++++++ src/ccu/ccu_snoop_pipeline.sv | 19 ++++++++++++++++++- src/ccu/ccu_top.sv | 3 ++- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index 9ffe9fd..9998ec3 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -78,4 +78,17 @@ package ccu_pkg; return p; endfunction + // Performance events + typedef struct packed { + logic stage0_stall; + logic stage0_stall_scoreboard_hit; + logic stage0_stall_ac_fifo_full; + logic stage0_stall_stage1_fifo_full; + logic stage1_stall; + logic stage1_stall_cr_not_valid; + logic stage1_stall_write_engine_busy; + logic stage1_stall_read_engine_busy; + logic stage1_stall_cd_engine_busy; + } ccu_snoop_pipeline_events_t; + endpackage diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index 81b7fbe..401fa88 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -67,7 +67,9 @@ module ccu_snoop_pipeline output ccu_axi_ar_t read_engine_ar_o, output logic read_engine_r_valid_o, input logic read_engine_r_ready_i, - output ccu_ace_r_t read_engine_r_o + output ccu_ace_r_t read_engine_r_o, + + output ccu_snoop_pipeline_events_t events_o ); // AC channel @@ -605,4 +607,19 @@ module ccu_snoop_pipeline user: cd_engine_fifo_rdata.ar_user }; // }}} + +// Performance events +// {{{ +always_comb begin : perf_events_comb + events_o.stage0_stall = ar_valid_i && !ar_ready_o; + events_o.stage0_stall_scoreboard_hit = scoreboard_alloc_hit_i; + events_o.stage0_stall_ac_fifo_full = ac_valid && !ac_ready; + events_o.stage0_stall_stage1_fifo_full = stage0_valid && !stage0_ready; + events_o.stage1_stall = stage1_fifo_valid && !stage1_fifo_ready; + events_o.stage1_stall_cr_not_valid = stage1_fifo_valid && |(~cr_fifo_valid & stage1_fifo_rdata.sel); + events_o.stage1_stall_write_engine_busy = write_engine_aw_valid_o && !write_engine_aw_ready_i; + events_o.stage1_stall_read_engine_busy = read_engine_ar_valid_o && !read_engine_ar_ready_i; + events_o.stage1_stall_cd_engine_busy = cd_engine_valid && !cd_engine_ready; +end +// }}} endmodule diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv index ff2605b..4af35e2 100644 --- a/src/ccu/ccu_top.sv +++ b/src/ccu/ccu_top.sv @@ -201,7 +201,8 @@ ccu_axi_manager_resp_t manager_cut_resp; .read_engine_ar_o (snoop_read_engine_ar), .read_engine_r_valid_o (snoop_read_engine_r_valid), .read_engine_r_ready_i (snoop_read_engine_r_ready), - .read_engine_r_o (snoop_read_engine_r) + .read_engine_r_o (snoop_read_engine_r), + .events_o (/* unused */) ); for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_snoop_assignments From cff670e4b6573f044a0eb81a23ab2f68b165e98e Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 15 Dec 2025 15:57:20 +0100 Subject: [PATCH 083/109] ccu: add authorship comments --- src/ccu/ccu_exclusive_monitor.sv | 3 +++ src/ccu/ccu_frontend.sv | 3 +++ src/ccu/ccu_pkg.sv | 3 +++ src/ccu/ccu_read_engine.sv | 3 +++ src/ccu/ccu_scoreboard.sv | 3 +++ src/ccu/ccu_snoop_pipeline.sv | 3 +++ src/ccu/ccu_top.sv | 3 +++ src/ccu/ccu_write_engine.sv | 3 +++ 8 files changed, 24 insertions(+) diff --git a/src/ccu/ccu_exclusive_monitor.sv b/src/ccu/ccu_exclusive_monitor.sv index f5b1dc7..a46fd3f 100644 --- a/src/ccu/ccu_exclusive_monitor.sv +++ b/src/ccu/ccu_exclusive_monitor.sv @@ -8,6 +8,9 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi `include "axi/assign.svh" `include "ace/assign.svh" diff --git a/src/ccu/ccu_frontend.sv b/src/ccu/ccu_frontend.sv index d8a7170..f72f6ae 100644 --- a/src/ccu/ccu_frontend.sv +++ b/src/ccu/ccu_frontend.sv @@ -8,6 +8,9 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi `include "ace/assign.svh" diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index 9998ec3..993adb3 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -8,6 +8,9 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi package ccu_pkg; diff --git a/src/ccu/ccu_read_engine.sv b/src/ccu/ccu_read_engine.sv index 478a692..bcf3409 100644 --- a/src/ccu/ccu_read_engine.sv +++ b/src/ccu/ccu_read_engine.sv @@ -8,6 +8,9 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi `include "axi/assign.svh" `include "ace/convert.svh" diff --git a/src/ccu/ccu_scoreboard.sv b/src/ccu/ccu_scoreboard.sv index aed6e85..8449e06 100644 --- a/src/ccu/ccu_scoreboard.sv +++ b/src/ccu/ccu_scoreboard.sv @@ -8,6 +8,9 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi module ccu_scoreboard import ace_pkg::*; diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index 401fa88..b5d7f2a 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -8,6 +8,9 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi `include "axi/assign.svh" diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv index 4af35e2..f39a20f 100644 --- a/src/ccu/ccu_top.sv +++ b/src/ccu/ccu_top.sv @@ -8,6 +8,9 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi `include "axi/typedef.svh" `include "ace/typedef.svh" diff --git a/src/ccu/ccu_write_engine.sv b/src/ccu/ccu_write_engine.sv index 0bc2e69..3e773a3 100644 --- a/src/ccu/ccu_write_engine.sv +++ b/src/ccu/ccu_write_engine.sv @@ -8,6 +8,9 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi `include "axi/assign.svh" From b808a6a8422c881352e0eb517ba190a9cc30f296 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 17 Dec 2025 19:20:45 +0100 Subject: [PATCH 084/109] ccu_snoop_pipeline: add register for performance events --- src/ccu/ccu_snoop_pipeline.sv | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index b5d7f2a..bf18919 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -613,16 +613,23 @@ module ccu_snoop_pipeline // Performance events // {{{ -always_comb begin : perf_events_comb - events_o.stage0_stall = ar_valid_i && !ar_ready_o; - events_o.stage0_stall_scoreboard_hit = scoreboard_alloc_hit_i; - events_o.stage0_stall_ac_fifo_full = ac_valid && !ac_ready; - events_o.stage0_stall_stage1_fifo_full = stage0_valid && !stage0_ready; - events_o.stage1_stall = stage1_fifo_valid && !stage1_fifo_ready; - events_o.stage1_stall_cr_not_valid = stage1_fifo_valid && |(~cr_fifo_valid & stage1_fifo_rdata.sel); - events_o.stage1_stall_write_engine_busy = write_engine_aw_valid_o && !write_engine_aw_ready_i; - events_o.stage1_stall_read_engine_busy = read_engine_ar_valid_o && !read_engine_ar_ready_i; - events_o.stage1_stall_cd_engine_busy = cd_engine_valid && !cd_engine_ready; -end + ccu_snoop_pipeline_events_t events_d; + + always_comb begin : perf_events_comb + events_d.stage0_stall = ar_valid_i && !ar_ready_o; + events_d.stage0_stall_scoreboard_hit = scoreboard_alloc_hit_i; + events_d.stage0_stall_ac_fifo_full = ac_valid && !ac_ready; + events_d.stage0_stall_stage1_fifo_full = stage0_valid && !stage0_ready; + events_d.stage1_stall = stage1_fifo_valid && !stage1_fifo_ready; + events_d.stage1_stall_cr_not_valid = stage1_fifo_valid && |(~cr_fifo_valid & stage1_fifo_rdata.sel); + events_d.stage1_stall_write_engine_busy = write_engine_aw_valid_o && !write_engine_aw_ready_i; + events_d.stage1_stall_read_engine_busy = read_engine_ar_valid_o && !read_engine_ar_ready_i; + events_d.stage1_stall_cd_engine_busy = cd_engine_valid && !cd_engine_ready; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) events_o <= '0; + else events_o <= events_d; + end // }}} endmodule From f12f1db7347a0702c92221c56c199d1e1127d12d Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 17 Dec 2025 19:22:25 +0100 Subject: [PATCH 085/109] ccu: add replay support --- src/ccu/ccu_pkg.sv | 7 ++ src/ccu/ccu_replay.sv | 185 ++++++++++++++++++++++++++++++++-- src/ccu/ccu_scoreboard.sv | 161 +++++++++++++++-------------- src/ccu/ccu_snoop_pipeline.sv | 10 +- src/ccu/ccu_top.sv | 109 +++++++++++++++----- 5 files changed, 361 insertions(+), 111 deletions(-) diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index 993adb3..617cc0e 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -23,6 +23,10 @@ package ccu_pkg; int unsigned numWriteTransactions; // Number of simultaneous inflight snoop transactions int unsigned numSnoopTransactions; + // Enable replay of conflicting requests + bit enableReplay; + // Number of replay list entries + int unsigned numReplayEntries; // AXI/ACE parameters int unsigned axiAddressWidth; int unsigned axiDataWidth; @@ -56,6 +60,8 @@ package ccu_pkg; int unsigned cachelineAxiTransfers; // Transaction index width int unsigned transactionIndexWidth; + // Replay entry index width + int unsigned replayEntryIndexWidth; // AXI data size int unsigned axiDataSize; // Address slice width used for hazard checks @@ -75,6 +81,7 @@ package ccu_pkg; p.writeTransactionIndexWidth = u.numWriteTransactions > 1 ? $clog2(u.numWriteTransactions) : 1; p.cachelineAxiTransfers = u.cachelineWidth / u.axiDataWidth; p.transactionIndexWidth = u.numShareableTransactions > 1 ? $clog2(u.numShareableTransactions) : 1; + p.replayEntryIndexWidth = u.numReplayEntries > 1 ? $clog2(u.numReplayEntries) : 1; p.axiDataSize = u.axiDataWidth > 8 ? $clog2(u.axiDataWidth / 8) : 1; p.addressCheckWidth = u.addressCheckMsb - u.addressCheckLsb + 1; diff --git a/src/ccu/ccu_replay.sv b/src/ccu/ccu_replay.sv index 0c62d7a..c535f4e 100644 --- a/src/ccu/ccu_replay.sv +++ b/src/ccu/ccu_replay.sv @@ -8,26 +8,189 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi module ccu_replay import ace_pkg::*; import ccu_pkg::*; #( - parameter ccu_config_t ccuCfg = '{default: '0} + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type ccu_ace_ar_t = logic, + localparam int unsigned numScoreboardEntries = ccuCfg.u.numShareableTransactions, + localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth ) ( - input logic clk_i, - input logic rst_ni, - - input logic replay_alloc_i, - output logic replay_full_o + input logic clk_i, + input logic rst_ni, + input logic alloc_i, + input ccu_ace_ar_t alloc_ar_i, + input logic [scoreboardEntryIndexWidth-1:0] alloc_scoreboard_entry_i, + input logic [scoreboardEntryIndexWidth-1:0] replay_scoreboard_entry_i, + output ccu_ace_ar_t replay_ar_o, + output logic replay_ar_valid_o, + input logic replay_ar_ready_i, + input logic [numScoreboardEntries-1:0] scoreboard_dealloc_i, + output logic full_o ); -/* -Stub implementation of replay -TODO: actual implementation -*/ +// Shared signals +// {{{ + typedef struct packed { + ccu_ace_ar_t ar; + logic [scoreboardEntryIndexWidth-1:0] dependency; + } replay_entry_t; + + typedef struct packed { + logic head; + logic tail; + logic [ccuCfg.replayEntryIndexWidth-1:0] next; + } replay_linked_list_t; + + replay_entry_t [ccuCfg.u.numReplayEntries-1:0] entry_q; + replay_entry_t [ccuCfg.u.numReplayEntries-1:0] entry_d; + replay_linked_list_t [ccuCfg.u.numReplayEntries-1:0] list_q; + replay_linked_list_t [ccuCfg.u.numReplayEntries-1:0] list_d; + logic [ccuCfg.u.numReplayEntries-1:0] valid_q; + logic [ccuCfg.u.numReplayEntries-1:0] valid_d; + logic [ccuCfg.u.numReplayEntries-1:0] hazard_q; + logic [ccuCfg.u.numReplayEntries-1:0] hazard_d; + + logic [ccuCfg.replayEntryIndexWidth-1:0] alloc_entry; + logic alloc_hazard; + logic alloc_head; + logic [ccuCfg.addressCheckWidth-1:0] alloc_addr_slice; + + logic [ccuCfg.u.numReplayEntries-1:0] address_hit; + logic [ccuCfg.u.numReplayEntries-1:0] replay_req; + logic [ccuCfg.u.numReplayEntries-1:0] replay_gnt; + ccu_ace_ar_t [ccuCfg.u.numReplayEntries-1:0] replay_ar; + logic replay_is_tail; + logic [ccuCfg.replayEntryIndexWidth-1:0] replay_entry; + logic [ccuCfg.replayEntryIndexWidth-1:0] replay_next_entry; + + assign full_o = &valid_q; + + always_comb begin : alloc_entry_comb + alloc_entry = '0; + for (int unsigned e = 0; e < ccuCfg.u.numReplayEntries; e++) begin + if (!valid_q[e]) begin + alloc_entry = e; + break; + end + end + end + + // Entries which are being allocated the same cycle the corresponding + // scoreboard entry is being deallocated can replay from the next cycle + assign alloc_hazard = !scoreboard_dealloc_i[alloc_scoreboard_entry_i]; + assign alloc_head = ~|(address_hit & valid_q); + assign alloc_addr_slice = alloc_ar_i.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; +// }}} + +// Per-entry logic +// {{{ + for (genvar e = 0; e < ccuCfg.u.numReplayEntries; e++) begin : gen_entry + logic alloc; + logic clear_hazard; + logic link; + logic make_head; + logic [ccuCfg.addressCheckWidth-1:0] addr_slice; + + assign addr_slice = entry_q[e].ar.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + assign address_hit[e] = addr_slice == alloc_addr_slice; + assign replay_ar[e] = entry_q[e].ar; + + assign alloc = alloc_entry == e && alloc_i; + assign make_head = replay_next_entry == e && |replay_gnt && !replay_is_tail; + assign link = valid_q[e] && list_q[e].tail && address_hit[e] && alloc_i; + assign clear_hazard = valid_q[e] && list_q[e].head && scoreboard_dealloc_i[entry_q[e].dependency]; + assign replay_req[e] = valid_q[e] && list_q[e].head && !hazard_q[e]; + + always_comb begin : entry_comb + list_d [e] = list_q [e]; + entry_d [e] = entry_q [e]; + valid_d [e] = valid_q [e]; + hazard_d[e] = hazard_q[e]; + + unique case (1'b1) + // The scoreboard dependency for a first allocation + // is given by the scoreboard entry with a colliding + // address + alloc: begin + valid_d [e] = 1'b1; + entry_d [e].ar = alloc_ar_i; + entry_d [e].dependency = alloc_scoreboard_entry_i; + list_d [e].tail = 1'b1; + list_d [e].head = alloc_head; + hazard_d[e] = alloc_hazard; + end + // The scoreboard dependency for a node being promoted + // to list head is the scoreboard entry being allocated + // to the previous head of the list + make_head: begin + entry_d [e].dependency = replay_scoreboard_entry_i; + list_d [e].head = 1'b1; + end + link: begin + list_d [e].tail = 1'b0; + list_d [e].next = alloc_entry; + end + replay_gnt[e]: begin + valid_d [e] = 1'b0; + end + default: ; + endcase + + // Hazard clearing can happen concurrently + // to other events, thus it cannot be in the + // unique case statement above + if (clear_hazard) begin + hazard_d[e] = 1'b0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + list_q [e] <= '0; + entry_q [e] <= '0; + valid_q [e] <= '0; + hazard_q[e] <= '0; + end else begin + list_q [e] <= list_d [e]; + entry_q [e] <= entry_d [e]; + valid_q [e] <= valid_d [e]; + hazard_q[e] <= hazard_d[e]; + end + end + end +// }}} -assign replay_full_o = 1'b1; +// Replay arbitration to snoop pipeline +// {{{ + rr_arb_tree #( + .NumIn (ccuCfg.u.numReplayEntries), + .DataType (ccu_ace_ar_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_replay_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (replay_req), + .gnt_o (replay_gnt), + .data_i (replay_ar), + .req_o (replay_ar_valid_o), + .gnt_i (replay_ar_ready_i), + .data_o (replay_ar_o), + .idx_o (replay_entry) + ); + assign replay_next_entry = list_q[replay_entry].next; + assign replay_is_tail = list_q[replay_entry].tail; +// }}} endmodule diff --git a/src/ccu/ccu_scoreboard.sv b/src/ccu/ccu_scoreboard.sv index 8449e06..413abe3 100644 --- a/src/ccu/ccu_scoreboard.sv +++ b/src/ccu/ccu_scoreboard.sv @@ -17,7 +17,7 @@ module ccu_scoreboard import ccu_pkg::*; #( parameter ccu_config_t ccuCfg = '{default: '0}, - localparam int unsigned scoreboardEntries = ccuCfg.u.numShareableTransactions, + localparam int unsigned numScoreboardEntries = ccuCfg.u.numShareableTransactions, localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth ) ( @@ -31,6 +31,8 @@ module ccu_scoreboard input logic [ccuCfg.u.axiAddressWidth-1:0] alloc_addr_i, input logic [ccuCfg.axiCcuIdWidth-1:0] alloc_id_i, output logic alloc_hit_o, + output logic [scoreboardEntryIndexWidth-1:0] alloc_hit_entry_o, + output logic [scoreboardEntryIndexWidth-1:0] alloc_entry_o, input logic dealloc_check_i, input logic [ccuCfg.axiCcuIdWidth-1:0] dealloc_id_i, @@ -38,84 +40,95 @@ module ccu_scoreboard output logic [scoreboardEntryIndexWidth-1:0] dealloc_hit_entry_o, input logic [ccuCfg.u.numSubordinates-1:0] dealloc_i, - input logic [ccuCfg.u.numSubordinates-1:0][scoreboardEntryIndexWidth-1:0] dealloc_entry_i + input logic [ccuCfg.u.numSubordinates-1:0][scoreboardEntryIndexWidth-1:0] dealloc_entry_i, + output logic [numScoreboardEntries-1:0] dealloc_o ); -typedef struct packed { - logic [ccuCfg.addressCheckWidth-1:0] addr; - logic [ccuCfg.axiCcuIdWidth-1:0] id; -} scoreboard_entry_t; - -logic [ccuCfg.addressCheckWidth-1:0] alloc_addr_slice; - -logic [scoreboardEntries-1:0] valid_q; -logic [scoreboardEntries-1:0] valid_d; -scoreboard_entry_t [scoreboardEntries-1:0] entry_q; -scoreboard_entry_t [scoreboardEntries-1:0] entry_d; -logic [scoreboardEntries-1:0] address_hit; -logic [scoreboardEntries-1:0] dealloc_id_hit; -logic [scoreboardEntryIndexWidth-1:0] alloc_entry; - -assign alloc_addr_slice = alloc_addr_i[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; - -assign alloc_hit_o = alloc_check_i && |(valid_q & address_hit); -assign dealloc_hit_o = dealloc_check_i && |(valid_q & dealloc_id_hit); - -always_comb begin : alloc_entry_comb - alloc_entry = '0; - for (int unsigned e = 0; e < scoreboardEntries; e++) begin - if (!valid_q[e]) begin - alloc_entry = e; - break; - end - end -end - -assign full_o = &valid_q; - -for (genvar e = 0; e < scoreboardEntries; e++) begin : gen_entry - logic [ccuCfg.subordinateIndexWidth-1:0] subordinate_index; - logic alloc; - logic dealloc; - assign subordinate_index = entry_q[e].id[ccuCfg.axiCcuIdWidth-1-:ccuCfg.subordinateIndexWidth]; - assign alloc = alloc_i && alloc_entry == e; - assign dealloc = dealloc_i[subordinate_index] && dealloc_entry_i[subordinate_index] == e; - assign address_hit[e] = alloc_addr_slice == entry_q[e].addr; - assign dealloc_id_hit[e] = dealloc_id_i == entry_q[e].id; - - always_comb begin : entry_comb - valid_d[e] = valid_q[e]; - entry_d[e] = entry_q[e]; - - unique case (1'b1) - alloc: begin - valid_d[e] = 1'b1; - entry_d[e].addr = alloc_addr_slice; - entry_d[e].id = alloc_id_i; - end - dealloc: begin - valid_d[e] = 1'b0; +// Shared signals +// {{{ + typedef struct packed { + logic [ccuCfg.addressCheckWidth-1:0] addr; + logic [ccuCfg.axiCcuIdWidth-1:0] id; + } scoreboard_entry_t; + + logic [ccuCfg.addressCheckWidth-1:0] alloc_addr_slice; + + logic [numScoreboardEntries-1:0] valid_q; + logic [numScoreboardEntries-1:0] valid_d; + scoreboard_entry_t [numScoreboardEntries-1:0] entry_q; + scoreboard_entry_t [numScoreboardEntries-1:0] entry_d; + logic [numScoreboardEntries-1:0] address_hit; + logic [numScoreboardEntries-1:0] dealloc_id_hit; + + assign alloc_addr_slice = alloc_addr_i[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + + assign alloc_hit_o = alloc_check_i && |(valid_q & address_hit); + assign dealloc_hit_o = dealloc_check_i && |(valid_q & dealloc_id_hit); + + always_comb begin : alloc_entry_comb + alloc_entry_o = '0; + for (int unsigned e = 0; e < numScoreboardEntries; e++) begin + if (!valid_q[e]) begin + alloc_entry_o = e; + break; end - default: ; - endcase - end - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - valid_q[e] <= 1'b0; - entry_q[e] <= '0; - end else begin - valid_q[e] <= valid_d[e]; - entry_q[e] <= entry_d[e]; end end -end -onehot_to_bin #( - .ONEHOT_WIDTH (scoreboardEntries) -) u_onehot_to_bin ( - .onehot (dealloc_id_hit & valid_q), - .bin (dealloc_hit_entry_o) -); + assign full_o = &valid_q; + + onehot_to_bin #( + .ONEHOT_WIDTH (numScoreboardEntries) + ) u_alloc_onehot_to_bin ( + .onehot (address_hit & valid_q), + .bin (alloc_hit_entry_o) + ); + + onehot_to_bin #( + .ONEHOT_WIDTH (numScoreboardEntries) + ) u_dealloc_onehot_to_bin ( + .onehot (dealloc_id_hit & valid_q), + .bin (dealloc_hit_entry_o) + ); +// }}} + +// Per-entry logic +// {{{ + for (genvar e = 0; e < numScoreboardEntries; e++) begin : gen_entry + logic [ccuCfg.subordinateIndexWidth-1:0] subordinate_index; + logic alloc; + assign subordinate_index = entry_q[e].id[ccuCfg.axiCcuIdWidth-1-:ccuCfg.subordinateIndexWidth]; + assign alloc = alloc_i && alloc_entry_o == e; + assign dealloc_o[e] = dealloc_i[subordinate_index] && dealloc_entry_i[subordinate_index] == e; + assign address_hit[e] = alloc_addr_slice == entry_q[e].addr; + assign dealloc_id_hit[e] = dealloc_id_i == entry_q[e].id; + + always_comb begin : entry_comb + valid_d[e] = valid_q[e]; + entry_d[e] = entry_q[e]; + + unique case (1'b1) + alloc: begin + valid_d[e] = 1'b1; + entry_d[e].addr = alloc_addr_slice; + entry_d[e].id = alloc_id_i; + end + dealloc_o[e]: begin + valid_d[e] = 1'b0; + end + default: ; + endcase + end + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + valid_q[e] <= 1'b0; + entry_q[e] <= '0; + end else begin + valid_q[e] <= valid_d[e]; + entry_q[e] <= entry_d[e]; + end + end + end +// }}} endmodule diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index bf18919..dbccfb9 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -44,7 +44,6 @@ module ccu_snoop_pipeline input logic scoreboard_full_i, output logic replay_alloc_o, - input logic replay_full_i, output logic [ccuCfg.u.numSubordinates-1:0] ac_valid_o, input logic [ccuCfg.u.numSubordinates-1:0] ac_ready_i, @@ -226,15 +225,20 @@ module ccu_snoop_pipeline }; assign scoreboard_alloc_check_o = !ar_is_read_no_snoop && ar_valid_i; - assign replay_alloc_o = !replay_full_i && scoreboard_alloc_hit_i; + assign replay_alloc_o = scoreboard_alloc_hit_i; always_comb begin : ar_stall_comb ar_fork_valid = ar_valid_i; ar_ready_o = ar_fork_ready; + // If a request arrives here, it means + // the replay table is not full since the check + // was performed upstream. + // We can safely allocate one entry as long + // as the replay table is actually instantiated. if (scoreboard_alloc_hit_i) begin ar_fork_valid = 1'b0; - ar_ready_o = !replay_full_i; + ar_ready_o = ccuCfg.u.enableReplay; end end diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv index f39a20f..7db3111 100644 --- a/src/ccu/ccu_top.sv +++ b/src/ccu/ccu_top.sv @@ -73,20 +73,21 @@ module ccu_top // }}} localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth; +localparam int unsigned numScoreboardEntries = ccuCfg.u.numShareableTransactions; logic scoreboard_full; logic scoreboard_alloc_check; logic scoreboard_alloc; logic scoreboard_alloc_hit; +logic [scoreboardEntryIndexWidth-1:0] scoreboard_alloc_hit_entry; +logic [scoreboardEntryIndexWidth-1:0] scoreboard_alloc_entry; logic scoreboard_dealloc_check; logic [ccuCfg.axiCcuIdWidth-1:0] scoreboard_dealloc_id; logic scoreboard_dealloc_hit; logic [scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_hit_entry; logic [ccuCfg.u.numSubordinates-1:0] scoreboard_dealloc; logic [ccuCfg.u.numSubordinates-1:0][scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_entry; - -logic replay_alloc; -logic replay_full; +logic [numScoreboardEntries-1:0] scoreboard_dealloc_bitvector; logic [ccuCfg.u.numSubordinates-1:0] snoop_ac_valid; logic [ccuCfg.u.numSubordinates-1:0] snoop_ac_ready; @@ -116,6 +117,11 @@ logic read_engine_addr_check; logic read_engine_addr_hit; logic [ccuCfg.addressCheckWidth-1:0] read_engine_addr_slice; +logic replay_alloc; +logic ar_valid; +logic ar_ready; +ccu_ace_ar_t ar; + ccu_axi_manager_req_t manager_cut_req; ccu_axi_manager_resp_t manager_cut_resp; @@ -158,6 +164,74 @@ ccu_axi_manager_resp_t manager_cut_resp; ); // }}} +// Replay list + if (ccuCfg.u.enableReplay) begin : gen_replay + logic replay_ar_valid; + logic replay_ar_ready; + ccu_ace_ar_t replay_ar; + logic replay_full; + logic frontend_ar_valid; + logic frontend_ar_ready; + logic frontend_ar_is_read_no_snoop; + + ccu_replay #( + .ccuCfg (ccuCfg), + .ccu_ace_ar_t (ccu_ace_ar_t) + ) u_ccu_replay ( + .clk_i, + .rst_ni, + .alloc_i (replay_alloc), + .alloc_ar_i (frontend_req.ar), + .alloc_scoreboard_entry_i (scoreboard_alloc_hit_entry), + .replay_scoreboard_entry_i (scoreboard_alloc_entry), + .replay_ar_o (replay_ar), + .replay_ar_valid_o (replay_ar_valid), + .replay_ar_ready_i (replay_ar_ready), + .scoreboard_dealloc_i (scoreboard_dealloc_bitvector), + .full_o (replay_full) + ); + + // Fixed priority arbitration gives precedence + // to replayable requests + // Shareable frontend requests are stalled once the + // replay list is full + assign frontend_ar_is_read_no_snoop = ace_is_read_no_snoop( + frontend_req.ar.bar[0], + frontend_req.ar.domain, + frontend_req.ar.snoop + ); + + assign frontend_ar_valid = (!replay_full || frontend_ar_is_read_no_snoop) && frontend_req.ar_valid; + assign frontend_resp.ar_ready = (!replay_full || frontend_ar_is_read_no_snoop) && frontend_ar_ready; + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ace_ar_t), + .ExtPrio (1'b1), + .AxiVldRdy (1'b1), + .LockIn (1'b0), + .FairArb (1'b1) + ) u_ccu_replay_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('1), + .req_i ({replay_ar_valid, frontend_ar_valid}), + .gnt_o ({replay_ar_ready, frontend_ar_ready}), + .data_i ({replay_ar , frontend_req.ar}), + .req_o (ar_valid), + .gnt_i (ar_ready), + .data_o (ar), + .idx_o (replay) + ); + end else begin : gen_no_replay + assign replay = 1'b0; + assign ar_valid = frontend_req.ar_valid; + assign frontend_resp.ar_ready = ar_ready; + assign ar = frontend_req.ar; + end +// }}} + // AR-related snoop pipeline // {{{ ccu_snoop_pipeline #( @@ -175,15 +249,14 @@ ccu_axi_manager_resp_t manager_cut_resp; .clk_i, .rst_ni, .domain_map_i (domain_map_i), - .ar_i (frontend_req.ar), - .ar_valid_i (frontend_req.ar_valid), - .ar_ready_o (frontend_resp.ar_ready), + .ar_i (ar), + .ar_valid_i (ar_valid), + .ar_ready_o (ar_ready), .scoreboard_alloc_check_o (scoreboard_alloc_check), .scoreboard_alloc_o (scoreboard_alloc), .scoreboard_alloc_hit_i (scoreboard_alloc_hit), .scoreboard_full_i (scoreboard_full), .replay_alloc_o (replay_alloc), - .replay_full_i (replay_full), .ac_valid_o (snoop_ac_valid), .ac_ready_i (snoop_ac_ready), .ac_o (snoop_ac), @@ -231,28 +304,18 @@ ccu_axi_manager_resp_t manager_cut_resp; .full_o (scoreboard_full), .alloc_check_i (scoreboard_alloc_check), .alloc_i (scoreboard_alloc), - .alloc_addr_i (frontend_req.ar.addr), - .alloc_id_i (frontend_req.ar.id), + .alloc_addr_i (ar.addr), + .alloc_id_i (ar.id), .alloc_hit_o (scoreboard_alloc_hit), + .alloc_hit_entry_o (scoreboard_alloc_hit_entry), + .alloc_entry_o (scoreboard_alloc_entry), .dealloc_check_i (scoreboard_dealloc_check), .dealloc_id_i (scoreboard_dealloc_id), .dealloc_hit_o (scoreboard_dealloc_hit), .dealloc_hit_entry_o (scoreboard_dealloc_hit_entry), .dealloc_i (scoreboard_dealloc), - .dealloc_entry_i (scoreboard_dealloc_entry) - ); -// }}} - -// Replay list -// TODO: currently a stub, to be implemented -// {{{ - ccu_replay #( - .ccuCfg (ccuCfg) - ) u_ccu_replay ( - .clk_i, - .rst_ni, - .replay_alloc_i (replay_alloc), - .replay_full_o (replay_full) + .dealloc_entry_i (scoreboard_dealloc_entry), + .dealloc_o (scoreboard_dealloc_bitvector) ); // }}} From f50055a09882e28650b3344cb1fddf241dd3a2d6 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 19 Dec 2025 13:52:26 +0100 Subject: [PATCH 086/109] ccu_replay: fix hazard setting during allocation --- src/ccu/ccu_replay.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ccu/ccu_replay.sv b/src/ccu/ccu_replay.sv index c535f4e..32d53c1 100644 --- a/src/ccu/ccu_replay.sv +++ b/src/ccu/ccu_replay.sv @@ -83,8 +83,9 @@ module ccu_replay end // Entries which are being allocated the same cycle the corresponding - // scoreboard entry is being deallocated can replay from the next cycle - assign alloc_hazard = !scoreboard_dealloc_i[alloc_scoreboard_entry_i]; + // scoreboard entry is being deallocated AND are list heads can replay + // from the next cycle + assign alloc_hazard = !alloc_head || !scoreboard_dealloc_i[alloc_scoreboard_entry_i]; assign alloc_head = ~|(address_hit & valid_q); assign alloc_addr_slice = alloc_ar_i.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; // }}} From c26967e09633f8ca9f598dd5cd9bdeb8070a33fb Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 19 Dec 2025 15:51:06 +0100 Subject: [PATCH 087/109] ccu_frontend: add few explanation comments --- src/ccu/ccu_frontend.sv | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/ccu/ccu_frontend.sv b/src/ccu/ccu_frontend.sv index f72f6ae..7511fc9 100644 --- a/src/ccu/ccu_frontend.sv +++ b/src/ccu/ccu_frontend.sv @@ -118,6 +118,7 @@ module ccu_frontend exclusive_monitor_r_valid[s] = subordinate_resp[s].r_valid; subordinate_req[s].r_ready = exclusive_monitor_r_ready[s]; + // Stall R responses once the RACK fifo is full if (rack_fifo_full) begin exclusive_monitor_r_valid[s] = 1'b0; subordinate_req[s].r_ready = 1'b0; @@ -155,6 +156,11 @@ module ccu_frontend assign rack_fifo_push = subordinate_resp_o[s].r_valid && subordinate_req_i[s].r_ready && subordinate_resp_o[s].r.last; + // RACK-related metadata are used to: + // - clear the corresponding scoreboard entry + // - clear the corresponding exclusive monitor entry + // SC failure responses are locally generated, thus no entry should be cleared + // once the RACK arrives assign rack_fifo_wdata = '{ tid: scoreboard_dealloc_entry_i, dealloc: scoreboard_dealloc_hit_i && !exclusive_monitor_sc_fail[s], From d35b4c0eede38881bba7d4fc4e2d725fc5284b70 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Feb 2026 13:23:04 +0100 Subject: [PATCH 088/109] ccu_replay: logic bugfixes --- src/ccu/ccu_replay.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccu/ccu_replay.sv b/src/ccu/ccu_replay.sv index 32d53c1..6c42981 100644 --- a/src/ccu/ccu_replay.sv +++ b/src/ccu/ccu_replay.sv @@ -86,7 +86,7 @@ module ccu_replay // scoreboard entry is being deallocated AND are list heads can replay // from the next cycle assign alloc_hazard = !alloc_head || !scoreboard_dealloc_i[alloc_scoreboard_entry_i]; - assign alloc_head = ~|(address_hit & valid_q); + assign alloc_head = ~|(address_hit & valid_q & ~replay_gnt); assign alloc_addr_slice = alloc_ar_i.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; // }}} @@ -105,7 +105,7 @@ module ccu_replay assign alloc = alloc_entry == e && alloc_i; assign make_head = replay_next_entry == e && |replay_gnt && !replay_is_tail; - assign link = valid_q[e] && list_q[e].tail && address_hit[e] && alloc_i; + assign link = valid_q[e] && list_q[e].tail && address_hit[e] && !replay_gnt[e] && alloc_i; assign clear_hazard = valid_q[e] && list_q[e].head && scoreboard_dealloc_i[entry_q[e].dependency]; assign replay_req[e] = valid_q[e] && list_q[e].head && !hazard_q[e]; From fdfc3699c6981753393a5727d7851874975ee2a4 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Feb 2026 13:24:25 +0100 Subject: [PATCH 089/109] ccu_snoop_pipeline: fix comment typo and code alignment --- src/ccu/ccu_snoop_pipeline.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index dbccfb9..a5b023a 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -157,7 +157,7 @@ module ccu_snoop_pipeline end // }}} -// CR channel +// CD channel // {{{ logic [ccuCfg.u.numSubordinates-1:0] cd_fifo_valid; logic [ccuCfg.u.numSubordinates-1:0] cd_fifo_ready; @@ -262,8 +262,8 @@ module ccu_snoop_pipeline // Stage 1 // {{{ typedef struct packed { - ccu_ace_ar_t ar; - logic [ccuCfg.u.numSubordinates-1:0] sel; + ccu_ace_ar_t ar; + logic [ccuCfg.u.numSubordinates-1:0] sel; } stage1_fifo_entry_t; logic stage1_fifo_valid; From 1178f0d1ce874bd5e60bfcdb67c5da3b92702900 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Feb 2026 17:18:52 +0100 Subject: [PATCH 090/109] ccu: add CSRs --- Bender.yml | 11 +- src/ccu/ccu_csr_wrap.sv | 73 +++++++ src/ccu/ccu_pkg.sv | 58 ++++-- src/ccu/ccu_snoop_pipeline.sv | 6 +- src/ccu/ccu_top.sv | 66 +++++- src/ccu/regs/ccu_csr.rdl | 57 ++++++ src/ccu/regs/generated/ccu_csr.sv | 284 ++++++++++++++++++++++++++ src/ccu/regs/generated/ccu_csr_pkg.sv | 50 +++++ 8 files changed, 574 insertions(+), 31 deletions(-) create mode 100644 src/ccu/ccu_csr_wrap.sv create mode 100644 src/ccu/regs/ccu_csr.rdl create mode 100644 src/ccu/regs/generated/ccu_csr.sv create mode 100644 src/ccu/regs/generated/ccu_csr_pkg.sv diff --git a/Bender.yml b/Bender.yml index 4bff8bb..ae5e0a6 100644 --- a/Bender.yml +++ b/Bender.yml @@ -6,9 +6,10 @@ package: - "Riccardo Tedeschi " dependencies: - axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.9 } - common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.39.0 } - axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", rev: 6d3c8b4} # branch: master + axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.9 } + common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.39.0 } + axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", rev: 6d3c8b4 } # branch: master + apb: { git: "https://github.com/pulp-platform/apb.git", version: 0.2.4 } export_include_dirs: - include @@ -20,7 +21,11 @@ sources: - src/snoop_intf.sv # CCU package - src/ccu/ccu_pkg.sv + # CCU generated regs + - src/ccu/regs/generated/ccu_csr_pkg.sv + - src/ccu/regs/generated/ccu_csr.sv # CCU source files + - src/ccu/ccu_csr_wrap.sv - src/ccu/ccu_exclusive_monitor.sv - src/ccu/ccu_frontend.sv - src/ccu/ccu_read_engine.sv diff --git a/src/ccu/ccu_csr_wrap.sv b/src/ccu/ccu_csr_wrap.sv new file mode 100644 index 0000000..136726c --- /dev/null +++ b/src/ccu/ccu_csr_wrap.sv @@ -0,0 +1,73 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +module ccu_csr_wrap + import ccu_pkg::*; + import ccu_csr_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type apb_req_t = logic, + parameter type apb_resp_t = logic, + parameter int unsigned numEvents = 16 +) ( + input logic clk_i, + input logic rst_ni, + // APB interface + input apb_req_t apb_req_i, + output apb_resp_t apb_resp_o, + // Performance events + input logic [numEvents-1:0] events_i +); + + localparam int unsigned numPerfCounters = 16; + + ccu_csr__in_t hwif_in; + ccu_csr__out_t hwif_out; + + for (genvar i = 0; i < numPerfCounters; i++) begin : gen_hwif + assign hwif_in.perf_counter[i].val.incr = &{ + hwif_out.perf_eventsel[i].event_id.value < numEvents, + !hwif_out.perf_countinhibit.inh.value[i], + events_i[hwif_out.perf_eventsel[i].event_id.value] + }; + end + + ccu_csr u_csr_regs ( + .clk (clk_i), + .arst_n (rst_ni), + .s_apb_psel (apb_req_i.psel), + .s_apb_penable (apb_req_i.penable), + .s_apb_pwrite (apb_req_i.pwrite), + .s_apb_pprot (apb_req_i.pprot), + .s_apb_paddr (apb_req_i.paddr[CCU_CSR_MIN_ADDR_WIDTH-1:0]), + .s_apb_pwdata (apb_req_i.pwdata), + .s_apb_pstrb (apb_req_i.pstrb), + .s_apb_pready (apb_resp_o.pready), + .s_apb_prdata (apb_resp_o.prdata), + .s_apb_pslverr (apb_resp_o.pslverr), + + .hwif_in (hwif_in), + .hwif_out (hwif_out) + ); + + // pragma translate_off + `ifndef VERILATOR + initial begin + tooManyPerformanceEvents: assert (numEvents <= 256) + else $fatal("Number of events exceeds 256!"); + end + `endif + // pragma translate_on + +endmodule diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index 617cc0e..0258a5c 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -14,30 +14,44 @@ package ccu_pkg; + // Available memory mapped IO interfaces + typedef enum { + CCU_MMIO_APB, + CCU_MMIO_REGBUS + } ccu_mmio_intf_e; + typedef struct packed { // Number of subordinate ports (i.e. coherent managers) - int unsigned numSubordinates; + int unsigned numSubordinates; // Number of shareable simultaneous inflight transactions - int unsigned numShareableTransactions; + int unsigned numShareableTransactions; // Number of simultaneous write transactions - int unsigned numWriteTransactions; + int unsigned numWriteTransactions; // Number of simultaneous inflight snoop transactions - int unsigned numSnoopTransactions; + int unsigned numSnoopTransactions; // Enable replay of conflicting requests - bit enableReplay; + bit enableReplay; // Number of replay list entries - int unsigned numReplayEntries; + int unsigned numReplayEntries; // AXI/ACE parameters - int unsigned axiAddressWidth; - int unsigned axiDataWidth; - int unsigned axiUserWidth; - int unsigned axiSubordinateIdWidth; + int unsigned axiAddressWidth; + int unsigned axiDataWidth; + int unsigned axiUserWidth; + int unsigned axiSubordinateIdWidth; // Cache parameters - int unsigned cachelineWidth; + int unsigned cachelineWidth; // LSB address bit used for hazard checks (inclusive) - int unsigned addressCheckLsb; + int unsigned addressCheckLsb; // MSB address bit used for hazard checks (inclusive) - int unsigned addressCheckMsb; + int unsigned addressCheckMsb; + // Make snoop request FIFOs fall through + bit snoopReqFifoFallthrough; + // Make snoop response FIFOs fall through + bit snoopRespFifoFallthrough; + // Protocol used to access the memory mapped registers + ccu_mmio_intf_e mmioIntf; + // Instantiate CCU control and status registers + bit enableCSRs; } ccu_user_config_t; typedef struct packed { @@ -90,15 +104,15 @@ package ccu_pkg; // Performance events typedef struct packed { - logic stage0_stall; - logic stage0_stall_scoreboard_hit; - logic stage0_stall_ac_fifo_full; - logic stage0_stall_stage1_fifo_full; - logic stage1_stall; - logic stage1_stall_cr_not_valid; - logic stage1_stall_write_engine_busy; - logic stage1_stall_read_engine_busy; - logic stage1_stall_cd_engine_busy; + logic stage0_stall; // 8 + logic stage0_stall_scoreboard_hit; // 7 + logic stage0_stall_ac_fifo_full; // 6 + logic stage0_stall_stage1_fifo_full; // 5 + logic stage1_stall; // 4 + logic stage1_stall_cr_not_valid; // 3 + logic stage1_stall_write_engine_busy; // 2 + logic stage1_stall_read_engine_busy; // 1 + logic stage1_stall_cd_engine_busy; // 0 } ccu_snoop_pipeline_events_t; endpackage diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index a5b023a..fa5a06f 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -96,7 +96,7 @@ module ccu_snoop_pipeline }; stream_fifo #( - .FALL_THROUGH (1'b0), + .FALL_THROUGH (ccuCfg.u.snoopReqFifoFallthrough), .DEPTH (ccuCfg.u.numSnoopTransactions), .T (ac_fifo_entry_t) ) u_ac_fifo ( @@ -138,7 +138,7 @@ module ccu_snoop_pipeline for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_cr_fifo stream_fifo #( - .FALL_THROUGH (1'b0), + .FALL_THROUGH (ccuCfg.u.snoopRespFifoFallthrough), .DEPTH (ccuCfg.u.numSnoopTransactions), .T (ccu_snoop_cr_t) ) u_cr_fifo ( @@ -165,7 +165,7 @@ module ccu_snoop_pipeline for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_cd_fifo stream_fifo #( - .FALL_THROUGH (1'b0), + .FALL_THROUGH (ccuCfg.u.snoopRespFifoFallthrough), .DEPTH (ccuCfg.u.numSnoopTransactions), .T (ccu_snoop_cd_t) ) u_cd_fifo ( diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv index 7db3111..4e2e114 100644 --- a/src/ccu/ccu_top.sv +++ b/src/ccu/ccu_top.sv @@ -16,6 +16,7 @@ `include "ace/typedef.svh" `include "axi/assign.svh" `include "ace/assign.svh" +`include "apb/typedef.svh" module ccu_top import ace_pkg::*; @@ -40,7 +41,9 @@ module ccu_top parameter type ccu_snoop_cr_t = logic, parameter type ccu_snoop_cd_t = logic, parameter type ccu_snoop_req_t = logic, - parameter type ccu_snoop_resp_t = logic + parameter type ccu_snoop_resp_t = logic, + parameter type mmio_req_t = logic, + parameter type mmio_resp_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -53,7 +56,10 @@ module ccu_top output ccu_snoop_req_t [ccuCfg.u.numSubordinates-1:0] snoop_req_o, input ccu_snoop_resp_t [ccuCfg.u.numSubordinates-1:0] snoop_resp_i, output ccu_axi_manager_req_t manager_req_o, - input ccu_axi_manager_resp_t manager_resp_i + input ccu_axi_manager_resp_t manager_resp_i, + + input mmio_req_t mmio_subordinate_req_i, + output mmio_resp_t mmio_subordinate_resp_o ); // AXI/ACE typedefs @@ -125,6 +131,8 @@ ccu_ace_ar_t ar; ccu_axi_manager_req_t manager_cut_req; ccu_axi_manager_resp_t manager_cut_resp; +ccu_snoop_pipeline_events_t perf_events; + // Frontend // {{{ // The frontend acts as the Point of Serialization (PoS) @@ -278,7 +286,7 @@ ccu_axi_manager_resp_t manager_cut_resp; .read_engine_r_valid_o (snoop_read_engine_r_valid), .read_engine_r_ready_i (snoop_read_engine_r_ready), .read_engine_r_o (snoop_read_engine_r), - .events_o (/* unused */) + .events_o (perf_events) ); for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_snoop_assignments @@ -423,4 +431,56 @@ ccu_axi_manager_resp_t manager_cut_resp; .mst_resp_i (manager_resp_i) ); // }}} + +// Control and status registers +// {{{ + typedef logic [$bits(mmio_subordinate_req_i.addr)-1:0] addr_t; + typedef logic [31:0] data_t; + typedef logic [3:0] strb_t; + + `APB_TYPEDEF_REQ_T(apb_req_t, addr_t, data_t, strb_t) + `APB_TYPEDEF_RESP_T(apb_resp_t, data_t) + + apb_req_t apb_req; + apb_resp_t apb_resp; + + if (ccuCfg.u.mmioIntf == CCU_MMIO_REGBUS) begin : gen_reg_to_apb + reg_to_apb #( + .reg_req_t (mmio_req_t), + .reg_rsp_t (mmio_resp_t), + .apb_req_t (apb_req_t), + .apb_rsp_t (apb_resp_t) + ) u_reg_to_apb ( + .clk_i, + .rst_ni, + .reg_req_i (mmio_subordinate_req_i), + .reg_rsp_o (mmio_subordinate_resp_o), + .apb_req_o (apb_req), + .apb_rsp_i (apb_resp) + ); + end else if (ccuCfg.u.mmioIntf == CCU_MMIO_APB) begin : gen_apb_passthrough + assign apb_req = mmio_subordinate_req_i; + assign mmio_subordinate_resp_o = apb_resp; + end + + if (ccuCfg.u.enableCSRs) begin : gen_csr + ccu_csr_wrap #( + .ccuCfg (ccuCfg), + .apb_req_t (apb_req_t), + .apb_resp_t (apb_resp_t), + .numEvents ($bits(ccu_snoop_pipeline_events_t)) + ) u_perf_counters ( + .clk_i, + .rst_ni, + .apb_req_i (apb_req), + .apb_resp_o (apb_resp), + .events_i (perf_events) + ); + end else begin + always_comb begin : apb_sink_tieoff + apb_resp = '0; + apb_resp.pready = 1'b1; + end + end +// }}} endmodule diff --git a/src/ccu/regs/ccu_csr.rdl b/src/ccu/regs/ccu_csr.rdl new file mode 100644 index 0000000..b85307a --- /dev/null +++ b/src/ccu/regs/ccu_csr.rdl @@ -0,0 +1,57 @@ +addrmap ccu_csr #( + longint unsigned numPerfCounters = 16 +) { + name = "CCU control and status registers"; + + reg perf_eventsel_r { + name = "Performance Event Selector"; + desc = "Selects the microarchitectural event to monitor."; + regwidth = 32; + + field { + name = "Event Code"; + desc = "Hardware event ID."; + sw = rw; + hw = r; + fieldwidth = 8; + } event_id = 0; + }; + + reg perf_counter_r { + name = "Performance Counter"; + desc = "32-bit counter value incremented by the hardware."; + regwidth = 32; + + field { + name = "Count"; + sw = rw; + hw = r; + counter; + fieldwidth = 32; + } val = 0; + }; + + reg perf_countinhibit_r { + name = "Counter Inhibit"; + desc = "Bit [i] = 1 means counter [i] is STOPPED. Bit [i] = 0 lets it run."; + regwidth = 32; + + field { + name = "Inhibit Bitmask"; + sw = rw; + hw = r; + fieldwidth = numPerfCounters; + } inh = (1 << numPerfCounters) - 1; + }; + + // --- Memory Map Instantiation --- + + // Global Control @ 0x00 + perf_countinhibit_r perf_countinhibit @ 0x00; + + // Array of Event Selectors @ 0x40 + perf_eventsel_r perf_eventsel[numPerfCounters] @ 0x40 += 0x4; + + // Array of 32-bit Counters @ 0xC0 + perf_counter_r perf_counter[numPerfCounters] @ 0xC0 += 0x4; +}; diff --git a/src/ccu/regs/generated/ccu_csr.sv b/src/ccu/regs/generated/ccu_csr.sv new file mode 100644 index 0000000..e96e468 --- /dev/null +++ b/src/ccu/regs/generated/ccu_csr.sv @@ -0,0 +1,284 @@ +// Generated by PeakRDL-regblock - A free and open-source SystemVerilog generator +// https://github.com/SystemRDL/PeakRDL-regblock + +module ccu_csr ( + input wire clk, + input wire arst_n, + + input wire s_apb_psel, + input wire s_apb_penable, + input wire s_apb_pwrite, + input wire [2:0] s_apb_pprot, + input wire [7:0] s_apb_paddr, + input wire [31:0] s_apb_pwdata, + input wire [3:0] s_apb_pstrb, + output logic s_apb_pready, + output logic [31:0] s_apb_prdata, + output logic s_apb_pslverr, + + input ccu_csr_pkg::ccu_csr__in_t hwif_in, + output ccu_csr_pkg::ccu_csr__out_t hwif_out + ); + + //-------------------------------------------------------------------------- + // CPU Bus interface logic + //-------------------------------------------------------------------------- + logic cpuif_req; + logic cpuif_req_is_wr; + logic [7:0] cpuif_addr; + logic [31:0] cpuif_wr_data; + logic [31:0] cpuif_wr_biten; + logic cpuif_req_stall_wr; + logic cpuif_req_stall_rd; + + logic cpuif_rd_ack; + logic cpuif_rd_err; + logic [31:0] cpuif_rd_data; + + logic cpuif_wr_ack; + logic cpuif_wr_err; + + // Request + logic is_active; + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + is_active <= '0; + cpuif_req <= '0; + cpuif_req_is_wr <= '0; + cpuif_addr <= '0; + cpuif_wr_data <= '0; + cpuif_wr_biten <= '0; + end else begin + if(~is_active) begin + if(s_apb_psel) begin + is_active <= '1; + cpuif_req <= '1; + cpuif_req_is_wr <= s_apb_pwrite; + cpuif_addr <= {s_apb_paddr[7:2], 2'b0}; + cpuif_wr_data <= s_apb_pwdata; + for(int i=0; i<4; i++) begin + cpuif_wr_biten[i*8 +: 8] <= {8{s_apb_pstrb[i]}}; + end + end + end else begin + cpuif_req <= '0; + if(cpuif_rd_ack || cpuif_wr_ack) begin + is_active <= '0; + end + end + end + end + + // Response + assign s_apb_pready = cpuif_rd_ack | cpuif_wr_ack; + assign s_apb_prdata = cpuif_rd_data; + assign s_apb_pslverr = cpuif_rd_err | cpuif_wr_err; + + logic cpuif_req_masked; + + // Read & write latencies are balanced. Stalls not required + assign cpuif_req_stall_rd = '0; + assign cpuif_req_stall_wr = '0; + assign cpuif_req_masked = cpuif_req + & !(!cpuif_req_is_wr & cpuif_req_stall_rd) + & !(cpuif_req_is_wr & cpuif_req_stall_wr); + + //-------------------------------------------------------------------------- + // Address Decode + //-------------------------------------------------------------------------- + typedef struct { + logic perf_countinhibit; + logic perf_eventsel[16]; + logic perf_counter[16]; + } decoded_reg_strb_t; + decoded_reg_strb_t decoded_reg_strb; + logic decoded_req; + logic decoded_req_is_wr; + logic [31:0] decoded_wr_data; + logic [31:0] decoded_wr_biten; + + always_comb begin + decoded_reg_strb.perf_countinhibit = cpuif_req_masked & (cpuif_addr == 8'h0); + for(int i0=0; i0<16; i0++) begin + decoded_reg_strb.perf_eventsel[i0] = cpuif_req_masked & (cpuif_addr == 8'h40 + (8)'(i0) * 8'h4); + end + for(int i0=0; i0<16; i0++) begin + decoded_reg_strb.perf_counter[i0] = cpuif_req_masked & (cpuif_addr == 8'hc0 + (8)'(i0) * 8'h4); + end + end + + // Pass down signals to next stage + assign decoded_req = cpuif_req_masked; + assign decoded_req_is_wr = cpuif_req_is_wr; + assign decoded_wr_data = cpuif_wr_data; + assign decoded_wr_biten = cpuif_wr_biten; + + //-------------------------------------------------------------------------- + // Field logic + //-------------------------------------------------------------------------- + typedef struct { + struct { + struct { + logic [15:0] next; + logic load_next; + } inh; + } perf_countinhibit; + struct { + struct { + logic [7:0] next; + logic load_next; + } event_id; + } perf_eventsel[16]; + struct { + struct { + logic [31:0] next; + logic load_next; + logic incrthreshold; + logic overflow; + } val; + } perf_counter[16]; + } field_combo_t; + field_combo_t field_combo; + + typedef struct { + struct { + struct { + logic [15:0] value; + } inh; + } perf_countinhibit; + struct { + struct { + logic [7:0] value; + } event_id; + } perf_eventsel[16]; + struct { + struct { + logic [31:0] value; + } val; + } perf_counter[16]; + } field_storage_t; + field_storage_t field_storage; + + // Field: ccu_csr.perf_countinhibit.inh + always_comb begin + automatic logic [15:0] next_c; + automatic logic load_next_c; + next_c = field_storage.perf_countinhibit.inh.value; + load_next_c = '0; + if(decoded_reg_strb.perf_countinhibit && decoded_req_is_wr) begin // SW write + next_c = (field_storage.perf_countinhibit.inh.value & ~decoded_wr_biten[15:0]) | (decoded_wr_data[15:0] & decoded_wr_biten[15:0]); + load_next_c = '1; + end + field_combo.perf_countinhibit.inh.next = next_c; + field_combo.perf_countinhibit.inh.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.perf_countinhibit.inh.value <= 16'hffff; + end else begin + if(field_combo.perf_countinhibit.inh.load_next) begin + field_storage.perf_countinhibit.inh.value <= field_combo.perf_countinhibit.inh.next; + end + end + end + assign hwif_out.perf_countinhibit.inh.value = field_storage.perf_countinhibit.inh.value; + for(genvar i0=0; i0<16; i0++) begin + // Field: ccu_csr.perf_eventsel[].event_id + always_comb begin + automatic logic [7:0] next_c; + automatic logic load_next_c; + next_c = field_storage.perf_eventsel[i0].event_id.value; + load_next_c = '0; + if(decoded_reg_strb.perf_eventsel[i0] && decoded_req_is_wr) begin // SW write + next_c = (field_storage.perf_eventsel[i0].event_id.value & ~decoded_wr_biten[7:0]) | (decoded_wr_data[7:0] & decoded_wr_biten[7:0]); + load_next_c = '1; + end + field_combo.perf_eventsel[i0].event_id.next = next_c; + field_combo.perf_eventsel[i0].event_id.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.perf_eventsel[i0].event_id.value <= 8'h0; + end else begin + if(field_combo.perf_eventsel[i0].event_id.load_next) begin + field_storage.perf_eventsel[i0].event_id.value <= field_combo.perf_eventsel[i0].event_id.next; + end + end + end + assign hwif_out.perf_eventsel[i0].event_id.value = field_storage.perf_eventsel[i0].event_id.value; + end + for(genvar i0=0; i0<16; i0++) begin + // Field: ccu_csr.perf_counter[].val + always_comb begin + automatic logic [31:0] next_c; + automatic logic load_next_c; + next_c = field_storage.perf_counter[i0].val.value; + load_next_c = '0; + if(decoded_reg_strb.perf_counter[i0] && decoded_req_is_wr) begin // SW write + next_c = (field_storage.perf_counter[i0].val.value & ~decoded_wr_biten[31:0]) | (decoded_wr_data[31:0] & decoded_wr_biten[31:0]); + load_next_c = '1; + end + if(hwif_in.perf_counter[i0].val.incr) begin // increment + field_combo.perf_counter[i0].val.overflow = (((33)'(next_c) + 32'h1) > 32'hffffffff); + next_c = next_c + 32'h1; + load_next_c = '1; + end else begin + field_combo.perf_counter[i0].val.overflow = '0; + end + field_combo.perf_counter[i0].val.incrthreshold = (field_storage.perf_counter[i0].val.value >= 32'hffffffff); + field_combo.perf_counter[i0].val.next = next_c; + field_combo.perf_counter[i0].val.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.perf_counter[i0].val.value <= 32'h0; + end else begin + if(field_combo.perf_counter[i0].val.load_next) begin + field_storage.perf_counter[i0].val.value <= field_combo.perf_counter[i0].val.next; + end + end + end + assign hwif_out.perf_counter[i0].val.value = field_storage.perf_counter[i0].val.value; + end + + //-------------------------------------------------------------------------- + // Write response + //-------------------------------------------------------------------------- + assign cpuif_wr_ack = decoded_req & decoded_req_is_wr; + // Writes are always granted with no error response + assign cpuif_wr_err = '0; + + //-------------------------------------------------------------------------- + // Readback + //-------------------------------------------------------------------------- + + logic readback_err; + logic readback_done; + logic [31:0] readback_data; + + // Assign readback values to a flattened array + logic [31:0] readback_array[33]; + assign readback_array[0][15:0] = (decoded_reg_strb.perf_countinhibit && !decoded_req_is_wr) ? field_storage.perf_countinhibit.inh.value : '0; + assign readback_array[0][31:16] = '0; + for(genvar i0=0; i0<16; i0++) begin + assign readback_array[i0 * 1 + 1][7:0] = (decoded_reg_strb.perf_eventsel[i0] && !decoded_req_is_wr) ? field_storage.perf_eventsel[i0].event_id.value : '0; + assign readback_array[i0 * 1 + 1][31:8] = '0; + end + for(genvar i0=0; i0<16; i0++) begin + assign readback_array[i0 * 1 + 17][31:0] = (decoded_reg_strb.perf_counter[i0] && !decoded_req_is_wr) ? field_storage.perf_counter[i0].val.value : '0; + end + + // Reduce the array + always_comb begin + automatic logic [31:0] readback_data_var; + readback_done = decoded_req & ~decoded_req_is_wr; + readback_err = '0; + readback_data_var = '0; + for(int i=0; i<33; i++) readback_data_var |= readback_array[i]; + readback_data = readback_data_var; + end + + assign cpuif_rd_ack = readback_done; + assign cpuif_rd_data = readback_data; + assign cpuif_rd_err = readback_err; +endmodule diff --git a/src/ccu/regs/generated/ccu_csr_pkg.sv b/src/ccu/regs/generated/ccu_csr_pkg.sv new file mode 100644 index 0000000..c0967b0 --- /dev/null +++ b/src/ccu/regs/generated/ccu_csr_pkg.sv @@ -0,0 +1,50 @@ +// Generated by PeakRDL-regblock - A free and open-source SystemVerilog generator +// https://github.com/SystemRDL/PeakRDL-regblock + +package ccu_csr_pkg; + + localparam CCU_CSR_DATA_WIDTH = 32; + localparam CCU_CSR_MIN_ADDR_WIDTH = 8; + + typedef struct { + logic incr; + } ccu_csr__perf_counter_r__val__in_t; + + typedef struct { + ccu_csr__perf_counter_r__val__in_t val; + } ccu_csr__perf_counter_r__in_t; + + typedef struct { + ccu_csr__perf_counter_r__in_t perf_counter[16]; + } ccu_csr__in_t; + + typedef struct { + logic [15:0] value; + } ccu_csr__perf_countinhibit_r__inh__out_t; + + typedef struct { + ccu_csr__perf_countinhibit_r__inh__out_t inh; + } ccu_csr__perf_countinhibit_r__out_t; + + typedef struct { + logic [7:0] value; + } ccu_csr__perf_eventsel_r__event_id__out_t; + + typedef struct { + ccu_csr__perf_eventsel_r__event_id__out_t event_id; + } ccu_csr__perf_eventsel_r__out_t; + + typedef struct { + logic [31:0] value; + } ccu_csr__perf_counter_r__val__out_t; + + typedef struct { + ccu_csr__perf_counter_r__val__out_t val; + } ccu_csr__perf_counter_r__out_t; + + typedef struct { + ccu_csr__perf_countinhibit_r__out_t perf_countinhibit; + ccu_csr__perf_eventsel_r__out_t perf_eventsel[16]; + ccu_csr__perf_counter_r__out_t perf_counter[16]; + } ccu_csr__out_t; +endpackage From 1c7ea5d2b7df7d7c5bf6bc8094342fd7b98194c2 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Feb 2026 17:19:54 +0100 Subject: [PATCH 091/109] ccu_snoop_pipeline: avoid hardcoded values --- src/ccu/ccu_snoop_pipeline.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index fa5a06f..1e40efb 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -580,9 +580,9 @@ module ccu_snoop_pipeline read_engine_r_resp[RESP_IS_DIRTY] = cd_engine_fifo_rdata.resp_dirty; read_engine_r_resp[RESP_IS_SHARED] = cd_engine_fifo_rdata.resp_shared; if (cd_engine_fifo_rdata.ar_lock) - read_engine_r_resp[1:0] = axi_pkg::RESP_EXOKAY; + read_engine_r_resp[axi_pkg::RespWidth-1:0] = axi_pkg::RESP_EXOKAY; else - read_engine_r_resp[1:0] = axi_pkg::RESP_OKAY; + read_engine_r_resp[axi_pkg::RespWidth-1:0] = axi_pkg::RESP_OKAY; end // Only the `r_last` field has to be multiplexed From 85140baf07eb14428955f2f170b85cb14d51eda1 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Fri, 20 Feb 2026 17:27:54 +0100 Subject: [PATCH 092/109] ccu_top: fix instance name --- src/ccu/ccu_top.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv index 4e2e114..ffe8c51 100644 --- a/src/ccu/ccu_top.sv +++ b/src/ccu/ccu_top.sv @@ -469,7 +469,7 @@ ccu_snoop_pipeline_events_t perf_events; .apb_req_t (apb_req_t), .apb_resp_t (apb_resp_t), .numEvents ($bits(ccu_snoop_pipeline_events_t)) - ) u_perf_counters ( + ) u_ccu_csrs ( .clk_i, .rst_ni, .apb_req_i (apb_req), From 0be22a2600a3cb32ca2f841283317e63a3c7a434 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 9 Mar 2026 14:04:36 +0100 Subject: [PATCH 093/109] pkg: refactor transaction decoding functions --- src/ace_pkg.sv | 130 +++++++++++-------------------- src/ccu/ccu_exclusive_monitor.sv | 25 +++--- 2 files changed, 58 insertions(+), 97 deletions(-) diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index 6f62a9d..508e099 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -268,122 +268,86 @@ package ace_pkg; // Transaction groups function automatic logic ace_aw_is_shareable(logic awbar0, axdomain_t awdomain, - awsnoop_t awsnoop); - logic retval; - unique case (1'b1) - ace_is_write_unique(awbar0, awdomain, awsnoop): retval = 1'b1; - ace_is_write_line_unique(awbar0, awdomain, awsnoop): retval = 1'b1; - default: retval = 1'b0; - endcase - return retval; + awsnoop_t awsnoop); + return ace_is_write_unique(awbar0, awdomain, awsnoop) || + ace_is_write_line_unique(awbar0, awdomain, awsnoop); endfunction function automatic logic ace_aw_is_memory_update(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); - logic retval; - unique case (1'b1) - ace_is_write_clean(awbar0, awdomain, awsnoop): retval = 1'b1; - ace_is_write_back(awbar0, awdomain, awsnoop): retval = 1'b1; - ace_is_evict(awbar0, awdomain, awsnoop): retval = 1'b1; - ace_is_write_evict(awbar0, awdomain, awsnoop): retval = 1'b1; - default: retval = 1'b0; - endcase - return retval; + return ace_is_write_clean(awbar0, awdomain, awsnoop) || + ace_is_write_back(awbar0, awdomain, awsnoop) || + ace_is_evict(awbar0, awdomain, awsnoop) || + ace_is_write_evict(awbar0, awdomain, awsnoop); endfunction function automatic logic ace_aw_is_non_blocking(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); - logic retval; - unique case (1'b1) - ace_aw_is_memory_update(awbar0, awdomain, awsnoop): retval = 1'b1; - ace_is_write_no_snoop(awbar0, awdomain, awsnoop): retval = 1'b1; - default: retval = 1'b0; - endcase - return retval; + return ace_aw_is_memory_update(awbar0, awdomain, awsnoop) || + ace_is_write_no_snoop(awbar0, awdomain, awsnoop); endfunction function automatic logic ace_ar_is_shareable(logic arbar0, axdomain_t ardomain, - arsnoop_t arsnoop); - logic retval; - unique case (1'b1) - ace_is_read_once(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_read_clean(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_read_unique(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_clean_unique(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_make_unique(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; - endcase - return retval; + arsnoop_t arsnoop); + return ace_is_read_once(arbar0, ardomain, arsnoop) || + ace_is_read_shared(arbar0, ardomain, arsnoop) || + ace_is_read_clean(arbar0, ardomain, arsnoop) || + ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop) || + ace_is_read_unique(arbar0, ardomain, arsnoop) || + ace_is_clean_unique(arbar0, ardomain, arsnoop) || + ace_is_make_unique(arbar0, ardomain, arsnoop); endfunction - function automatic logic ace_ar_is_clean(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); - logic retval; - unique case (1'b1) - ace_is_clean_unique(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_clean_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_clean_invalid(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; - endcase - return retval; + function automatic logic ace_ar_is_clean(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ace_is_clean_unique(arbar0, ardomain, arsnoop) || + ace_is_clean_shared(arbar0, ardomain, arsnoop) || + ace_is_clean_invalid(arbar0, ardomain, arsnoop); endfunction // Snoop transaction from initiating master transaction function automatic acsnoop_t ace_ar_acsnoop_map(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); - acsnoop_t acsnoop; - unique case (1'b1) - ace_is_clean_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(CleanInvalid); - ace_is_make_unique(arbar0, ardomain, arsnoop): acsnoop = acsnoop_t'(MakeInvalid); - default: acsnoop = acsnoop_t'(arsnoop); - endcase - return acsnoop; + return ace_is_clean_unique(arbar0, ardomain, arsnoop) ? acsnoop_t'(CleanInvalid) : + ace_is_make_unique(arbar0, ardomain, arsnoop) ? acsnoop_t'(MakeInvalid) : + acsnoop_t'(arsnoop); endfunction function automatic acsnoop_t ace_aw_acsnoop_map(logic awbar0, axdomain_t awdomain, - arsnoop_t awsnoop); - acsnoop_t acsnoop; - unique case (1'b1) - ace_is_write_unique(awbar0, awdomain, awsnoop): acsnoop = acsnoop_t'(CleanInvalid); - ace_is_write_line_unique(awbar0, awdomain, awsnoop): acsnoop = acsnoop_t'(MakeInvalid); - default: acsnoop = acsnoop_t'(CleanInvalid); - endcase - return acsnoop; + awsnoop_t awsnoop); + return ace_is_write_unique(awbar0, awdomain, awsnoop) ? acsnoop_t'(CleanInvalid) : + ace_is_write_line_unique(awbar0, awdomain, awsnoop) ? acsnoop_t'(MakeInvalid) : + acsnoop_t'(CleanInvalid); endfunction function automatic logic ace_ar_accepts_dirty(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); - logic retval; - unique case (1'b1) - ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_read_unique(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; - endcase - return retval; + return ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop) || + ace_is_read_shared(arbar0, ardomain, arsnoop) || + ace_is_read_unique(arbar0, ardomain, arsnoop); endfunction function automatic logic ace_ar_accepts_dirty_shared(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); - logic retval; - unique case (1'b1) - ace_is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; - endcase - return retval; + return ace_is_read_shared(arbar0, ardomain, arsnoop); endfunction function automatic logic ace_ar_accepts_shared(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); - logic retval; - unique case (1'b1) - ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_read_shared(arbar0, ardomain, arsnoop): retval = 1'b1; - ace_is_read_clean(arbar0, ardomain, arsnoop): retval = 1'b1; - default: retval = 1'b0; - endcase - return retval; + return ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop) || + ace_is_read_shared(arbar0, ardomain, arsnoop) || + ace_is_read_clean(arbar0, ardomain, arsnoop); + endfunction + + function automatic logic ace_ar_is_exclusive_load(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop, logic arlock); + return (ace_is_read_shared(arbar0, ardomain, arsnoop) || + ace_is_read_clean(arbar0, ardomain, arsnoop)) && arlock; + endfunction + + function automatic logic ace_ar_is_exclusive_store(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop, logic arlock); + return (ace_is_clean_unique(arbar0, ardomain, arsnoop)) && arlock; endfunction endpackage diff --git a/src/ccu/ccu_exclusive_monitor.sv b/src/ccu/ccu_exclusive_monitor.sv index a46fd3f..d5c52a3 100644 --- a/src/ccu/ccu_exclusive_monitor.sv +++ b/src/ccu/ccu_exclusive_monitor.sv @@ -86,23 +86,20 @@ for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_entry assign r_id_hit_o[s] = entry_q[s].id == r_o[s].id; - assign is_exclusive_load = ar_i[s].lock && ( - ace_is_read_clean ( - ar_i[s].bar, + assign is_exclusive_load = + ace_ar_is_exclusive_load ( + ar_i[s].bar[0], ar_i[s].domain, - ar_i[s].snoop - ) || - ace_is_read_shared( - ar_i[s].bar, - ar_i[s].domain, - ar_i[s].snoop - )); + ar_i[s].snoop, + ar_i[s].lock + ); - assign is_exclusive_store = ar_i[s].lock && - ace_is_clean_unique( - ar_i[s].bar, + assign is_exclusive_store = + ace_ar_is_exclusive_store( + ar_i[s].bar[0], ar_i[s].domain, - ar_i[s].snoop + ar_i[s].snoop, + ar_i[s].lock ); assign is_exclusive_sequence = is_exclusive_store || is_exclusive_load; From 4dcf3c2d8ec9ef52896496f7725c1d05bfeca844 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 9 Mar 2026 14:05:50 +0100 Subject: [PATCH 094/109] ccu_snoop_pipeline: fix indentation --- src/ccu/ccu_snoop_pipeline.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index 1e40efb..56bccae 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -98,7 +98,7 @@ module ccu_snoop_pipeline stream_fifo #( .FALL_THROUGH (ccuCfg.u.snoopReqFifoFallthrough), .DEPTH (ccuCfg.u.numSnoopTransactions), - .T (ac_fifo_entry_t) + .T (ac_fifo_entry_t) ) u_ac_fifo ( .clk_i, .rst_ni, From b12572f57da045f6e9c9d9da53e705da891d3dba Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 9 Mar 2026 14:06:11 +0100 Subject: [PATCH 095/109] ccu_snoop_pipeline: fix `ACPROT` propagation --- src/ccu/ccu_snoop_pipeline.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index 56bccae..7f1ad98 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -221,7 +221,7 @@ module ccu_snoop_pipeline assign ac = '{ addr: axi_pkg::aligned_addr(ar_i.addr, ccuCfg.cachelineByteIndexWidth), snoop: ac_snoop, - prot: '0 + prot: ar_i.prot }; assign scoreboard_alloc_check_o = !ar_is_read_no_snoop && ar_valid_i; From 1bc1ddedff1e17c7807381eaa3ad5c88e7cdc10f Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 9 Mar 2026 14:06:37 +0100 Subject: [PATCH 096/109] ccu_snoop_pipeline: fix missing signal declarations --- src/ccu/ccu_snoop_pipeline.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index 7f1ad98..13fe444 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -285,6 +285,8 @@ module ccu_snoop_pipeline logic cd_engine_ack_to_read; logic cd_engine_valid; logic cd_engine_ready; + logic cd_engine_resp_shared; + logic cd_engine_resp_dirty; assign stage1_fifo_wdata = '{ ar : ar_i, From 43286683a4cc56513503a91b08446a392731ccc0 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 9 Mar 2026 14:07:04 +0100 Subject: [PATCH 097/109] ccu_top, ccu_frontend: insert parametric subordinate cut --- src/ccu/ccu_frontend.sv | 6 +++--- src/ccu/ccu_pkg.sv | 6 ++++++ src/ccu/ccu_top.sv | 30 ++++++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/ccu/ccu_frontend.sv b/src/ccu/ccu_frontend.sv index 7511fc9..bbaf45f 100644 --- a/src/ccu/ccu_frontend.sv +++ b/src/ccu/ccu_frontend.sv @@ -238,9 +238,9 @@ module ccu_frontend .NoSlvPorts (ccuCfg.u.numSubordinates), .MaxWTrans (ccuCfg.u.numWriteTransactions), .FallThrough (1'b1), - .SpillAw (1'b1), - .SpillW (1'b1), - .SpillB (1'b1), + .SpillAw (1'b0), + .SpillW (1'b0), + .SpillB (1'b0), .SpillAr (1'b0), .SpillR (1'b0) ) u_subordinate_mux ( diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index 0258a5c..1b1b0ac 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -52,6 +52,12 @@ package ccu_pkg; ccu_mmio_intf_e mmioIntf; // Instantiate CCU control and status registers bit enableCSRs; + // Insert spill registers in the frontend + bit frontendPipeAw; + bit frontendPipeW; + bit frontendPipeB; + bit frontendPipeAr; + bit frontendPipeR; } ccu_user_config_t; typedef struct packed { diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv index ffe8c51..68a007d 100644 --- a/src/ccu/ccu_top.sv +++ b/src/ccu/ccu_top.sv @@ -136,9 +136,35 @@ ccu_snoop_pipeline_events_t perf_events; // Frontend // {{{ // The frontend acts as the Point of Serialization (PoS) + ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_cut_req; + ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_cut_resp; ccu_ace_req_t frontend_req; ccu_ace_resp_t frontend_resp; + for (genvar s = 0 ; s < ccuCfg.u.numSubordinates; s++) begin : gen_subordinate_cut + axi_cut #( + .BypassAw (!ccuCfg.u.frontendPipeAw), + .BypassW (!ccuCfg.u.frontendPipeW), + .BypassB (!ccuCfg.u.frontendPipeB), + .BypassAr (!ccuCfg.u.frontendPipeAr), + .BypassR (!ccuCfg.u.frontendPipeR), + .aw_chan_t (ccu_ace_subordinate_aw_t), + .w_chan_t (ccu_w_t), + .b_chan_t (ccu_ace_subordinate_b_t), + .ar_chan_t (ccu_ace_subordinate_ar_t), + .r_chan_t (ccu_ace_subordinate_r_t), + .axi_req_t (ccu_ace_subordinate_req_t), + .axi_resp_t (ccu_ace_subordinate_resp_t) + ) u_subordinate_cut ( + .clk_i, + .rst_ni, + .slv_req_i (subordinate_req_i[s]), + .slv_resp_o (subordinate_resp_o[s]), + .mst_req_o (subordinate_cut_req[s]), + .mst_resp_i (subordinate_cut_resp[s]) + ); + end + ccu_frontend #( .ccuCfg (ccuCfg), .ccu_ace_manager_ar_t (ccu_ace_ar_t), @@ -157,8 +183,8 @@ ccu_snoop_pipeline_events_t perf_events; ) u_ccu_frontend ( .clk_i, .rst_ni, - .subordinate_req_i (subordinate_req_i), - .subordinate_resp_o (subordinate_resp_o), + .subordinate_req_i (subordinate_cut_req), + .subordinate_resp_o (subordinate_cut_resp), .subordinate_rack_i (subordinate_rack_i), .subordinate_wack_i (subordinate_wack_i), .manager_req_o (frontend_req), From a69e26ea368d009c7878664a86816b0a4f747c4a Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 9 Mar 2026 14:07:43 +0100 Subject: [PATCH 098/109] ccu_snoop_pipeline: fix `MakeUnique` handling --- src/ccu/ccu_snoop_pipeline.sv | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index 13fe444..cfd6bd3 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -348,6 +348,10 @@ module ccu_snoop_pipeline stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop + ) || ace_is_make_unique( + stage1_fifo_rdata.ar.bar[0], + stage1_fifo_rdata.ar.domain, + stage1_fifo_rdata.ar.snoop ); always_comb begin : engine_sel_comb From 29b1e95e2d7df8c55339f51b62695781984d7060 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 9 Mar 2026 15:49:52 +0100 Subject: [PATCH 099/109] ccu_csr_wrap: automate performance counters parameter computation Somewhat hacky. --- src/ccu/ccu_csr_wrap.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ccu/ccu_csr_wrap.sv b/src/ccu/ccu_csr_wrap.sv index 136726c..29c7557 100644 --- a/src/ccu/ccu_csr_wrap.sv +++ b/src/ccu/ccu_csr_wrap.sv @@ -30,7 +30,8 @@ module ccu_csr_wrap input logic [numEvents-1:0] events_i ); - localparam int unsigned numPerfCounters = 16; + localparam int unsigned numPerfCounters = + $bits(ccu_csr__perf_countinhibit_r__inh__out_t); ccu_csr__in_t hwif_in; ccu_csr__out_t hwif_out; From ea619d3caaf79628799e6e9979e3dd2f8f3c2182 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 11 Mar 2026 09:47:47 +0100 Subject: [PATCH 100/109] ccu_csr: increase performance counters number and track new events --- src/ccu/ccu_pkg.sv | 29 +++++++++----- src/ccu/ccu_snoop_pipeline.sv | 27 +++++++++++++ src/ccu/regs/ccu_csr.rdl | 2 +- src/ccu/regs/generated/ccu_csr.sv | 55 +++++++++++++-------------- src/ccu/regs/generated/ccu_csr_pkg.sv | 10 ++--- 5 files changed, 80 insertions(+), 43 deletions(-) diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index 1b1b0ac..38d746c 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -110,15 +110,26 @@ package ccu_pkg; // Performance events typedef struct packed { - logic stage0_stall; // 8 - logic stage0_stall_scoreboard_hit; // 7 - logic stage0_stall_ac_fifo_full; // 6 - logic stage0_stall_stage1_fifo_full; // 5 - logic stage1_stall; // 4 - logic stage1_stall_cr_not_valid; // 3 - logic stage1_stall_write_engine_busy; // 2 - logic stage1_stall_read_engine_busy; // 1 - logic stage1_stall_cd_engine_busy; // 0 + logic stage1_read_no_snoop; // 0x13 + logic stage1_read_once; // 0x12 + logic stage1_read_shared; // 0x11 + logic stage1_read_clean; // 0x10 + logic stage1_read_not_shared_dirty; // 0x0F + logic stage1_read_unique; // 0x0E + logic stage1_clean_unique; // 0x0D + logic stage1_make_unique; // 0x0C + logic stage1_clean_shared; // 0x0B + logic stage1_clean_invalid; // 0x0A + logic stage1_make_invalid; // 0x09 + logic stage0_stall; // 0x08 + logic stage0_stall_scoreboard_hit; // 0x07 + logic stage0_stall_ac_fifo_full; // 0x06 + logic stage0_stall_stage1_fifo_full; // 0x05 + logic stage1_stall; // 0x04 + logic stage1_stall_cr_not_valid; // 0x03 + logic stage1_stall_write_engine_busy; // 0x02 + logic stage1_stall_read_engine_busy; // 0x01 + logic stage1_stall_cd_engine_busy; // 0x00 } ccu_snoop_pipeline_events_t; endpackage diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index cfd6bd3..886f7ef 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -626,6 +626,33 @@ module ccu_snoop_pipeline ccu_snoop_pipeline_events_t events_d; always_comb begin : perf_events_comb + events_d = '0; + // Transaction occurrence + if (stage1_fifo_valid && stage1_fifo_ready) begin + events_d.stage1_read_no_snoop = + ace_is_read_no_snoop(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_once = + ace_is_read_once(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_shared = + ace_is_read_shared(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_clean = + ace_is_read_clean(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_not_shared_dirty = + ace_is_read_not_shared_dirty(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_unique = + ace_is_read_unique(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_clean_unique = + ace_is_clean_unique(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_make_unique = + ace_is_make_unique(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_clean_shared = + ace_is_clean_shared(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_clean_invalid = + ace_is_clean_invalid(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_make_invalid = + ace_is_make_invalid(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + end + // Stalls events_d.stage0_stall = ar_valid_i && !ar_ready_o; events_d.stage0_stall_scoreboard_hit = scoreboard_alloc_hit_i; events_d.stage0_stall_ac_fifo_full = ac_valid && !ac_ready; diff --git a/src/ccu/regs/ccu_csr.rdl b/src/ccu/regs/ccu_csr.rdl index b85307a..92ecacd 100644 --- a/src/ccu/regs/ccu_csr.rdl +++ b/src/ccu/regs/ccu_csr.rdl @@ -1,5 +1,5 @@ addrmap ccu_csr #( - longint unsigned numPerfCounters = 16 + longint unsigned numPerfCounters = 32 ) { name = "CCU control and status registers"; diff --git a/src/ccu/regs/generated/ccu_csr.sv b/src/ccu/regs/generated/ccu_csr.sv index e96e468..3ba3f21 100644 --- a/src/ccu/regs/generated/ccu_csr.sv +++ b/src/ccu/regs/generated/ccu_csr.sv @@ -9,7 +9,7 @@ module ccu_csr ( input wire s_apb_penable, input wire s_apb_pwrite, input wire [2:0] s_apb_pprot, - input wire [7:0] s_apb_paddr, + input wire [8:0] s_apb_paddr, input wire [31:0] s_apb_pwdata, input wire [3:0] s_apb_pstrb, output logic s_apb_pready, @@ -25,7 +25,7 @@ module ccu_csr ( //-------------------------------------------------------------------------- logic cpuif_req; logic cpuif_req_is_wr; - logic [7:0] cpuif_addr; + logic [8:0] cpuif_addr; logic [31:0] cpuif_wr_data; logic [31:0] cpuif_wr_biten; logic cpuif_req_stall_wr; @@ -54,7 +54,7 @@ module ccu_csr ( is_active <= '1; cpuif_req <= '1; cpuif_req_is_wr <= s_apb_pwrite; - cpuif_addr <= {s_apb_paddr[7:2], 2'b0}; + cpuif_addr <= {s_apb_paddr[8:2], 2'b0}; cpuif_wr_data <= s_apb_pwdata; for(int i=0; i<4; i++) begin cpuif_wr_biten[i*8 +: 8] <= {8{s_apb_pstrb[i]}}; @@ -88,8 +88,8 @@ module ccu_csr ( //-------------------------------------------------------------------------- typedef struct { logic perf_countinhibit; - logic perf_eventsel[16]; - logic perf_counter[16]; + logic perf_eventsel[32]; + logic perf_counter[32]; } decoded_reg_strb_t; decoded_reg_strb_t decoded_reg_strb; logic decoded_req; @@ -98,12 +98,12 @@ module ccu_csr ( logic [31:0] decoded_wr_biten; always_comb begin - decoded_reg_strb.perf_countinhibit = cpuif_req_masked & (cpuif_addr == 8'h0); - for(int i0=0; i0<16; i0++) begin - decoded_reg_strb.perf_eventsel[i0] = cpuif_req_masked & (cpuif_addr == 8'h40 + (8)'(i0) * 8'h4); + decoded_reg_strb.perf_countinhibit = cpuif_req_masked & (cpuif_addr == 9'h0); + for(int i0=0; i0<32; i0++) begin + decoded_reg_strb.perf_eventsel[i0] = cpuif_req_masked & (cpuif_addr == 9'h40 + (9)'(i0) * 9'h4); end - for(int i0=0; i0<16; i0++) begin - decoded_reg_strb.perf_counter[i0] = cpuif_req_masked & (cpuif_addr == 8'hc0 + (8)'(i0) * 8'h4); + for(int i0=0; i0<32; i0++) begin + decoded_reg_strb.perf_counter[i0] = cpuif_req_masked & (cpuif_addr == 9'hc0 + (9)'(i0) * 9'h4); end end @@ -119,7 +119,7 @@ module ccu_csr ( typedef struct { struct { struct { - logic [15:0] next; + logic [31:0] next; logic load_next; } inh; } perf_countinhibit; @@ -128,7 +128,7 @@ module ccu_csr ( logic [7:0] next; logic load_next; } event_id; - } perf_eventsel[16]; + } perf_eventsel[32]; struct { struct { logic [31:0] next; @@ -136,37 +136,37 @@ module ccu_csr ( logic incrthreshold; logic overflow; } val; - } perf_counter[16]; + } perf_counter[32]; } field_combo_t; field_combo_t field_combo; typedef struct { struct { struct { - logic [15:0] value; + logic [31:0] value; } inh; } perf_countinhibit; struct { struct { logic [7:0] value; } event_id; - } perf_eventsel[16]; + } perf_eventsel[32]; struct { struct { logic [31:0] value; } val; - } perf_counter[16]; + } perf_counter[32]; } field_storage_t; field_storage_t field_storage; // Field: ccu_csr.perf_countinhibit.inh always_comb begin - automatic logic [15:0] next_c; + automatic logic [31:0] next_c; automatic logic load_next_c; next_c = field_storage.perf_countinhibit.inh.value; load_next_c = '0; if(decoded_reg_strb.perf_countinhibit && decoded_req_is_wr) begin // SW write - next_c = (field_storage.perf_countinhibit.inh.value & ~decoded_wr_biten[15:0]) | (decoded_wr_data[15:0] & decoded_wr_biten[15:0]); + next_c = (field_storage.perf_countinhibit.inh.value & ~decoded_wr_biten[31:0]) | (decoded_wr_data[31:0] & decoded_wr_biten[31:0]); load_next_c = '1; end field_combo.perf_countinhibit.inh.next = next_c; @@ -174,7 +174,7 @@ module ccu_csr ( end always_ff @(posedge clk or negedge arst_n) begin if(~arst_n) begin - field_storage.perf_countinhibit.inh.value <= 16'hffff; + field_storage.perf_countinhibit.inh.value <= 32'hffffffff; end else begin if(field_combo.perf_countinhibit.inh.load_next) begin field_storage.perf_countinhibit.inh.value <= field_combo.perf_countinhibit.inh.next; @@ -182,7 +182,7 @@ module ccu_csr ( end end assign hwif_out.perf_countinhibit.inh.value = field_storage.perf_countinhibit.inh.value; - for(genvar i0=0; i0<16; i0++) begin + for(genvar i0=0; i0<32; i0++) begin // Field: ccu_csr.perf_eventsel[].event_id always_comb begin automatic logic [7:0] next_c; @@ -207,7 +207,7 @@ module ccu_csr ( end assign hwif_out.perf_eventsel[i0].event_id.value = field_storage.perf_eventsel[i0].event_id.value; end - for(genvar i0=0; i0<16; i0++) begin + for(genvar i0=0; i0<32; i0++) begin // Field: ccu_csr.perf_counter[].val always_comb begin automatic logic [31:0] next_c; @@ -257,15 +257,14 @@ module ccu_csr ( logic [31:0] readback_data; // Assign readback values to a flattened array - logic [31:0] readback_array[33]; - assign readback_array[0][15:0] = (decoded_reg_strb.perf_countinhibit && !decoded_req_is_wr) ? field_storage.perf_countinhibit.inh.value : '0; - assign readback_array[0][31:16] = '0; - for(genvar i0=0; i0<16; i0++) begin + logic [31:0] readback_array[65]; + assign readback_array[0][31:0] = (decoded_reg_strb.perf_countinhibit && !decoded_req_is_wr) ? field_storage.perf_countinhibit.inh.value : '0; + for(genvar i0=0; i0<32; i0++) begin assign readback_array[i0 * 1 + 1][7:0] = (decoded_reg_strb.perf_eventsel[i0] && !decoded_req_is_wr) ? field_storage.perf_eventsel[i0].event_id.value : '0; assign readback_array[i0 * 1 + 1][31:8] = '0; end - for(genvar i0=0; i0<16; i0++) begin - assign readback_array[i0 * 1 + 17][31:0] = (decoded_reg_strb.perf_counter[i0] && !decoded_req_is_wr) ? field_storage.perf_counter[i0].val.value : '0; + for(genvar i0=0; i0<32; i0++) begin + assign readback_array[i0 * 1 + 33][31:0] = (decoded_reg_strb.perf_counter[i0] && !decoded_req_is_wr) ? field_storage.perf_counter[i0].val.value : '0; end // Reduce the array @@ -274,7 +273,7 @@ module ccu_csr ( readback_done = decoded_req & ~decoded_req_is_wr; readback_err = '0; readback_data_var = '0; - for(int i=0; i<33; i++) readback_data_var |= readback_array[i]; + for(int i=0; i<65; i++) readback_data_var |= readback_array[i]; readback_data = readback_data_var; end diff --git a/src/ccu/regs/generated/ccu_csr_pkg.sv b/src/ccu/regs/generated/ccu_csr_pkg.sv index c0967b0..0cdd97e 100644 --- a/src/ccu/regs/generated/ccu_csr_pkg.sv +++ b/src/ccu/regs/generated/ccu_csr_pkg.sv @@ -4,7 +4,7 @@ package ccu_csr_pkg; localparam CCU_CSR_DATA_WIDTH = 32; - localparam CCU_CSR_MIN_ADDR_WIDTH = 8; + localparam CCU_CSR_MIN_ADDR_WIDTH = 9; typedef struct { logic incr; @@ -15,11 +15,11 @@ package ccu_csr_pkg; } ccu_csr__perf_counter_r__in_t; typedef struct { - ccu_csr__perf_counter_r__in_t perf_counter[16]; + ccu_csr__perf_counter_r__in_t perf_counter[32]; } ccu_csr__in_t; typedef struct { - logic [15:0] value; + logic [31:0] value; } ccu_csr__perf_countinhibit_r__inh__out_t; typedef struct { @@ -44,7 +44,7 @@ package ccu_csr_pkg; typedef struct { ccu_csr__perf_countinhibit_r__out_t perf_countinhibit; - ccu_csr__perf_eventsel_r__out_t perf_eventsel[16]; - ccu_csr__perf_counter_r__out_t perf_counter[16]; + ccu_csr__perf_eventsel_r__out_t perf_eventsel[32]; + ccu_csr__perf_counter_r__out_t perf_counter[32]; } ccu_csr__out_t; endpackage From 7d917788f59c2b20ec3b74417a6ddb68afb19e01 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 11 Mar 2026 09:48:22 +0100 Subject: [PATCH 101/109] ccu_scoreboard: fix unwanted allocation when scoreboard is full --- src/ccu/ccu_scoreboard.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ccu_scoreboard.sv b/src/ccu/ccu_scoreboard.sv index 413abe3..a5125dd 100644 --- a/src/ccu/ccu_scoreboard.sv +++ b/src/ccu/ccu_scoreboard.sv @@ -98,7 +98,7 @@ module ccu_scoreboard logic [ccuCfg.subordinateIndexWidth-1:0] subordinate_index; logic alloc; assign subordinate_index = entry_q[e].id[ccuCfg.axiCcuIdWidth-1-:ccuCfg.subordinateIndexWidth]; - assign alloc = alloc_i && alloc_entry_o == e; + assign alloc = alloc_i && alloc_entry_o == e && !full_o; assign dealloc_o[e] = dealloc_i[subordinate_index] && dealloc_entry_i[subordinate_index] == e; assign address_hit[e] = alloc_addr_slice == entry_q[e].addr; assign dealloc_id_hit[e] = dealloc_id_i == entry_q[e].id; From 61e0c0f3980da11698e0d48e7d3a44fc4579e53e Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 11 Mar 2026 13:56:12 +0100 Subject: [PATCH 102/109] ccu_snoop_pipeline: fix stage0 stalling condition --- src/ccu/ccu_snoop_pipeline.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index 886f7ef..eade9f2 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -252,7 +252,7 @@ module ccu_snoop_pipeline .valid_i (ar_fork_valid), .ready_o (ar_fork_ready), .sel_i ({!ar_is_read_no_snoop, 1'b1}), - .sel_valid_i (!ar_is_read_no_snoop || !scoreboard_full_i), + .sel_valid_i (ar_is_read_no_snoop || !scoreboard_full_i), .sel_ready_o (), .valid_o ({ac_valid, stage0_valid}), .ready_i ({ac_ready, stage0_ready}) From 3a35e89308a61656772a592c7398e1fa19aef3ab Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 11 Mar 2026 13:56:38 +0100 Subject: [PATCH 103/109] ccu_snoop_pipeline: add new performance events --- src/ccu/ccu_pkg.sv | 27 ++++++++++++++------------- src/ccu/ccu_snoop_pipeline.sv | 26 ++++++++++++++++---------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index 38d746c..c9b71fd 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -110,19 +110,20 @@ package ccu_pkg; // Performance events typedef struct packed { - logic stage1_read_no_snoop; // 0x13 - logic stage1_read_once; // 0x12 - logic stage1_read_shared; // 0x11 - logic stage1_read_clean; // 0x10 - logic stage1_read_not_shared_dirty; // 0x0F - logic stage1_read_unique; // 0x0E - logic stage1_clean_unique; // 0x0D - logic stage1_make_unique; // 0x0C - logic stage1_clean_shared; // 0x0B - logic stage1_clean_invalid; // 0x0A - logic stage1_make_invalid; // 0x09 - logic stage0_stall; // 0x08 - logic stage0_stall_scoreboard_hit; // 0x07 + logic stage1_read_no_snoop; // 0x14 + logic stage1_read_once; // 0x13 + logic stage1_read_shared; // 0x12 + logic stage1_read_clean; // 0x11 + logic stage1_read_not_shared_dirty; // 0x10 + logic stage1_read_unique; // 0x0F + logic stage1_clean_unique; // 0x0E + logic stage1_make_unique; // 0x0D + logic stage1_clean_shared; // 0x0C + logic stage1_clean_invalid; // 0x0B + logic stage1_make_invalid; // 0x0A + logic stage0_stall; // 0x09 + logic stage0_stall_scoreboard_hit; // 0x08 + logic stage0_stall_scoreboard_full; // 0x07 logic stage0_stall_ac_fifo_full; // 0x06 logic stage0_stall_stage1_fifo_full; // 0x05 logic stage1_stall; // 0x04 diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index eade9f2..9bc9f6e 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -652,16 +652,22 @@ module ccu_snoop_pipeline events_d.stage1_make_invalid = ace_is_make_invalid(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); end - // Stalls - events_d.stage0_stall = ar_valid_i && !ar_ready_o; - events_d.stage0_stall_scoreboard_hit = scoreboard_alloc_hit_i; - events_d.stage0_stall_ac_fifo_full = ac_valid && !ac_ready; - events_d.stage0_stall_stage1_fifo_full = stage0_valid && !stage0_ready; - events_d.stage1_stall = stage1_fifo_valid && !stage1_fifo_ready; - events_d.stage1_stall_cr_not_valid = stage1_fifo_valid && |(~cr_fifo_valid & stage1_fifo_rdata.sel); - events_d.stage1_stall_write_engine_busy = write_engine_aw_valid_o && !write_engine_aw_ready_i; - events_d.stage1_stall_read_engine_busy = read_engine_ar_valid_o && !read_engine_ar_ready_i; - events_d.stage1_stall_cd_engine_busy = cd_engine_valid && !cd_engine_ready; + // Stage 0 stalls + if (ar_valid_i && !ar_ready_o) begin + events_d.stage0_stall = 1'b1; + events_d.stage0_stall_scoreboard_hit = scoreboard_alloc_hit_i; + events_d.stage0_stall_scoreboard_full = scoreboard_full_i; + events_d.stage0_stall_ac_fifo_full = ac_valid && !ac_ready; + events_d.stage0_stall_stage1_fifo_full = stage0_valid && !stage0_ready; + end + // Stage 1 stalls + if (stage1_fifo_valid && !stage1_fifo_ready) begin + events_d.stage1_stall = 1'b1; + events_d.stage1_stall_cr_not_valid = stage1_fifo_valid && |(~cr_fifo_valid & stage1_fifo_rdata.sel); + events_d.stage1_stall_write_engine_busy = write_engine_aw_valid_o && !write_engine_aw_ready_i; + events_d.stage1_stall_read_engine_busy = read_engine_ar_valid_o && !read_engine_ar_ready_i; + events_d.stage1_stall_cd_engine_busy = cd_engine_valid && !cd_engine_ready; + end end always_ff @(posedge clk_i or negedge rst_ni) begin From 785d0fab4488b6708299850423fe5577d7aa7718 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 23 Mar 2026 11:14:08 +0100 Subject: [PATCH 104/109] ccu: fix `make*` handling --- src/ace_pkg.sv | 6 ++++++ src/ccu/ccu_snoop_pipeline.sv | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index 508e099..6dd8cb0 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -305,6 +305,12 @@ package ace_pkg; ace_is_clean_invalid(arbar0, ardomain, arsnoop); endfunction + function automatic logic ace_ar_is_make(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ace_is_make_invalid(arbar0, ardomain, arsnoop) || + ace_is_make_unique(arbar0, ardomain, arsnoop); + endfunction + // Snoop transaction from initiating master transaction function automatic acsnoop_t ace_ar_acsnoop_map(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index 9bc9f6e..d094f33 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -272,7 +272,7 @@ module ccu_snoop_pipeline stage1_fifo_entry_t stage1_fifo_rdata; logic accepts_dirty; logic accepts_shared; - logic is_clean; + logic is_clean_or_make; ccu_snoop_cr_t cr; logic [ccuCfg.u.numSubordinates-1:0] cd_data_transfer; logic engine_fork_valid; @@ -344,11 +344,11 @@ module ccu_snoop_pipeline stage1_fifo_rdata.ar.snoop ); - assign is_clean = ace_ar_is_clean( + assign is_clean_or_make = ace_ar_is_clean( stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop - ) || ace_is_make_unique( + ) || ace_ar_is_make( stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop @@ -362,7 +362,7 @@ module ccu_snoop_pipeline cd_engine_forward_to_write = 1'b0; cd_engine_ack_to_read = 1'b0; - case ({cr.resp.DataTransfer, is_clean}) + case ({cr.resp.DataTransfer, is_clean_or_make}) // Forward the request to memory 2'b00: read_engine_sel = 1'b1; // Send only the clean R response @@ -374,8 +374,8 @@ module ccu_snoop_pipeline // is providing data default: begin cd_engine_sel = 1'b1; - cd_engine_forward_to_read = !is_clean; - cd_engine_ack_to_read = is_clean; + cd_engine_forward_to_read = !is_clean_or_make; + cd_engine_ack_to_read = is_clean_or_make; if (cr.resp.PassDirty && !accepts_dirty) begin // The initiator cannot accept dirty data, // thus we need a writeback From 3934d97b15883ebb2f2b07ef6a7ca7f4bcdf3bc0 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Mon, 23 Mar 2026 11:15:12 +0100 Subject: [PATCH 105/109] ccu: add snoop miss and hit events --- src/ccu/ccu_pkg.sv | 6 ++++-- src/ccu/ccu_snoop_pipeline.sv | 29 +++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv index c9b71fd..3420105 100644 --- a/src/ccu/ccu_pkg.sv +++ b/src/ccu/ccu_pkg.sv @@ -110,6 +110,8 @@ package ccu_pkg; // Performance events typedef struct packed { + logic snoop_hit; // 0x16 + logic snoop_miss; // 0x15 logic stage1_read_no_snoop; // 0x14 logic stage1_read_once; // 0x13 logic stage1_read_shared; // 0x12 @@ -121,12 +123,12 @@ package ccu_pkg; logic stage1_clean_shared; // 0x0C logic stage1_clean_invalid; // 0x0B logic stage1_make_invalid; // 0x0A - logic stage0_stall; // 0x09 + logic stage0_stall_other; // 0x09 logic stage0_stall_scoreboard_hit; // 0x08 logic stage0_stall_scoreboard_full; // 0x07 logic stage0_stall_ac_fifo_full; // 0x06 logic stage0_stall_stage1_fifo_full; // 0x05 - logic stage1_stall; // 0x04 + logic stage1_stall_other; // 0x04 logic stage1_stall_cr_not_valid; // 0x03 logic stage1_stall_write_engine_busy; // 0x02 logic stage1_stall_read_engine_busy; // 0x01 diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv index d094f33..cfc9167 100644 --- a/src/ccu/ccu_snoop_pipeline.sv +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -654,19 +654,36 @@ module ccu_snoop_pipeline end // Stage 0 stalls if (ar_valid_i && !ar_ready_o) begin - events_d.stage0_stall = 1'b1; - events_d.stage0_stall_scoreboard_hit = scoreboard_alloc_hit_i; - events_d.stage0_stall_scoreboard_full = scoreboard_full_i; - events_d.stage0_stall_ac_fifo_full = ac_valid && !ac_ready; - events_d.stage0_stall_stage1_fifo_full = stage0_valid && !stage0_ready; + events_d.stage0_stall_scoreboard_hit = scoreboard_alloc_hit_i; + events_d.stage0_stall_scoreboard_full = scoreboard_full_i; + events_d.stage0_stall_ac_fifo_full = ac_valid && !ac_ready; + events_d.stage0_stall_stage1_fifo_full = stage0_valid && !stage0_ready; + // Catch all event + events_d.stage0_stall_other = ~|{ + events_d.stage0_stall_scoreboard_hit, + events_d.stage0_stall_scoreboard_full, + events_d.stage0_stall_ac_fifo_full, + events_d.stage0_stall_stage1_fifo_full + }; end // Stage 1 stalls if (stage1_fifo_valid && !stage1_fifo_ready) begin - events_d.stage1_stall = 1'b1; events_d.stage1_stall_cr_not_valid = stage1_fifo_valid && |(~cr_fifo_valid & stage1_fifo_rdata.sel); events_d.stage1_stall_write_engine_busy = write_engine_aw_valid_o && !write_engine_aw_ready_i; events_d.stage1_stall_read_engine_busy = read_engine_ar_valid_o && !read_engine_ar_ready_i; events_d.stage1_stall_cd_engine_busy = cd_engine_valid && !cd_engine_ready; + // Catch all event + events_d.stage1_stall_other = ~|{ + events_d.stage1_stall_cr_not_valid, + events_d.stage1_stall_write_engine_busy, + events_d.stage1_stall_read_engine_busy, + events_d.stage1_stall_cd_engine_busy + }; + end + + if (stage1_fifo_valid && stage1_fifo_ready && |stage1_fifo_rdata.sel && !is_clean_or_make) begin + events_d.snoop_hit = cr.resp.DataTransfer; + events_d.snoop_miss = !cr.resp.DataTransfer; end end From 96b9dd7b41c853c09f23db63eea19c2b29eb676e Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Wed, 25 Mar 2026 17:11:05 +0100 Subject: [PATCH 106/109] fronted: use custom arbiter --- Bender.yml | 1 + src/ccu/ccu_frontend.sv | 56 +++---- src/ccu/ccu_frontend_arbiter.sv | 279 ++++++++++++++++++++++++++++++++ 3 files changed, 306 insertions(+), 30 deletions(-) create mode 100644 src/ccu/ccu_frontend_arbiter.sv diff --git a/Bender.yml b/Bender.yml index ae5e0a6..077c2ec 100644 --- a/Bender.yml +++ b/Bender.yml @@ -27,6 +27,7 @@ sources: # CCU source files - src/ccu/ccu_csr_wrap.sv - src/ccu/ccu_exclusive_monitor.sv + - src/ccu/ccu_frontend_arbiter.sv - src/ccu/ccu_frontend.sv - src/ccu/ccu_read_engine.sv - src/ccu/ccu_replay.sv diff --git a/src/ccu/ccu_frontend.sv b/src/ccu/ccu_frontend.sv index bbaf45f..5a99972 100644 --- a/src/ccu/ccu_frontend.sv +++ b/src/ccu/ccu_frontend.sv @@ -220,38 +220,34 @@ module ccu_frontend // Point of Serialization (PoS) // {{{ - axi_mux #( - .SlvAxiIDWidth (ccuCfg.u.axiSubordinateIdWidth), - .slv_aw_chan_t (ccu_ace_subordinate_aw_t), - .mst_aw_chan_t (ccu_ace_manager_aw_t), - .w_chan_t (ccu_w_t), - .slv_b_chan_t (ccu_ace_subordinate_b_t), - .mst_b_chan_t (ccu_ace_manager_b_t), - .slv_ar_chan_t (ccu_ace_subordinate_ar_t), - .mst_ar_chan_t (ccu_ace_manager_ar_t), - .slv_r_chan_t (ccu_ace_subordinate_r_t), - .mst_r_chan_t (ccu_ace_manager_r_t), - .slv_req_t (ccu_ace_subordinate_req_t), - .slv_resp_t (ccu_ace_subordinate_resp_t), - .mst_req_t (ccu_ace_manager_req_t), - .mst_resp_t (ccu_ace_manager_resp_t), - .NoSlvPorts (ccuCfg.u.numSubordinates), - .MaxWTrans (ccuCfg.u.numWriteTransactions), - .FallThrough (1'b1), - .SpillAw (1'b0), - .SpillW (1'b0), - .SpillB (1'b0), - .SpillAr (1'b0), - .SpillR (1'b0) - ) u_subordinate_mux ( + ccu_frontend_arbiter #( + .numSubordinates (ccuCfg.u.numSubordinates), + .aceSubordinateIdWidth(ccuCfg.u.axiSubordinateIdWidth), + .maxWTrans (ccuCfg.u.numWriteTransactions), + .fallThrough (1'b1), + + .ccu_ace_manager_ar_t (ccu_ace_manager_ar_t), + .ccu_ace_manager_aw_t (ccu_ace_manager_aw_t), + .ccu_w_t (ccu_w_t), + .ccu_ace_manager_r_t (ccu_ace_manager_r_t), + .ccu_ace_manager_b_t (ccu_ace_manager_b_t), + .ccu_ace_manager_req_t (ccu_ace_manager_req_t), + .ccu_ace_manager_resp_t(ccu_ace_manager_resp_t), + + .ccu_ace_subordinate_ar_t (ccu_ace_subordinate_ar_t), + .ccu_ace_subordinate_aw_t (ccu_ace_subordinate_aw_t), + .ccu_ace_subordinate_r_t (ccu_ace_subordinate_r_t), + .ccu_ace_subordinate_b_t (ccu_ace_subordinate_b_t), + .ccu_ace_subordinate_req_t (ccu_ace_subordinate_req_t), + .ccu_ace_subordinate_resp_t(ccu_ace_subordinate_resp_t) + ) u_subordinate_arbiter ( .clk_i, .rst_ni, - .test_i (1'b0), - .slv_reqs_i (subordinate_req), - .slv_resps_o (subordinate_resp), - .mst_req_o (manager_req_o), - .mst_resp_i (manager_resp_i) - ); + .subordinate_req_i (subordinate_req), + .subordinate_resp_o (subordinate_resp), + .manager_req_o (manager_req_o), + .manager_resp_i (manager_resp_i) + ); // }}} // Scoreboard dealloc check diff --git a/src/ccu/ccu_frontend_arbiter.sv b/src/ccu/ccu_frontend_arbiter.sv new file mode 100644 index 0000000..de62f28 --- /dev/null +++ b/src/ccu/ccu_frontend_arbiter.sv @@ -0,0 +1,279 @@ +// Copyright (c) 2026 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +module ccu_frontend_arbiter + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter int unsigned numSubordinates = 0, + parameter int unsigned aceSubordinateIdWidth = 0, + parameter int unsigned maxWTrans = 0, + parameter int unsigned fallThrough = 0, + + parameter type ccu_ace_manager_ar_t = logic, + parameter type ccu_ace_manager_aw_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_ace_manager_r_t = logic, + parameter type ccu_ace_manager_b_t = logic, + parameter type ccu_ace_manager_req_t = logic, + parameter type ccu_ace_manager_resp_t = logic, + + parameter type ccu_ace_subordinate_ar_t = logic, + parameter type ccu_ace_subordinate_aw_t = logic, + parameter type ccu_ace_subordinate_r_t = logic, + parameter type ccu_ace_subordinate_b_t = logic, + parameter type ccu_ace_subordinate_req_t = logic, + parameter type ccu_ace_subordinate_resp_t = logic + +) ( + input logic clk_i, + input logic rst_ni, + + input ccu_ace_subordinate_req_t [numSubordinates-1:0] subordinate_req_i, + output ccu_ace_subordinate_resp_t [numSubordinates-1:0] subordinate_resp_o, + output ccu_ace_manager_req_t manager_req_o, + input ccu_ace_manager_resp_t manager_resp_i +); + +localparam int unsigned subordinateIndexWidth = numSubordinates > 1 ? $clog2(numSubordinates) : 1; +localparam int unsigned aceManagerIdWidth = subordinateIndexWidth + aceSubordinateIdWidth; + +ccu_ace_manager_aw_t [numSubordinates-1:0] subordinate_aw; +logic [numSubordinates-1:0] subordinate_aw_valid; +logic [numSubordinates-1:0] subordinate_aw_ready; +ccu_w_t [numSubordinates-1:0] subordinate_w; +logic [numSubordinates-1:0] subordinate_w_valid; +logic [numSubordinates-1:0] subordinate_w_ready; +ccu_ace_manager_b_t [numSubordinates-1:0] subordinate_b; +logic [numSubordinates-1:0] subordinate_b_valid; +logic [numSubordinates-1:0] subordinate_b_ready; +ccu_ace_manager_ar_t [numSubordinates-1:0] subordinate_ar; +logic [numSubordinates-1:0] subordinate_ar_valid; +logic [numSubordinates-1:0] subordinate_ar_ready; +ccu_ace_manager_r_t [numSubordinates-1:0] subordinate_r; +logic [numSubordinates-1:0] subordinate_r_valid; +logic [numSubordinates-1:0] subordinate_r_ready; + +logic aw_arbiter_valid; +logic aw_arbiter_ready; + +for (genvar s = 0; s < numSubordinates; s++) begin : gen_id_prepend + + axi_id_prepend #( + .NoBus (32'd1), + .AxiIdWidthSlvPort (aceSubordinateIdWidth), + .AxiIdWidthMstPort (aceManagerIdWidth), + .slv_aw_chan_t (ccu_ace_subordinate_aw_t), + .slv_w_chan_t (ccu_w_t), + .slv_b_chan_t (ccu_ace_subordinate_b_t), + .slv_ar_chan_t (ccu_ace_subordinate_ar_t), + .slv_r_chan_t (ccu_ace_subordinate_r_t), + .mst_aw_chan_t (ccu_ace_manager_aw_t), + .mst_w_chan_t (ccu_w_t), + .mst_b_chan_t (ccu_ace_manager_b_t), + .mst_ar_chan_t (ccu_ace_manager_ar_t), + .mst_r_chan_t (ccu_ace_manager_r_t) + ) u_id_prepend ( + .pre_id_i (subordinateIndexWidth'(s)), + .slv_aw_chans_i (subordinate_req_i[s].aw), + .slv_aw_valids_i (subordinate_req_i[s].aw_valid), + .slv_aw_readies_o (subordinate_resp_o[s].aw_ready), + .slv_w_chans_i (subordinate_req_i[s].w), + .slv_w_valids_i (subordinate_req_i[s].w_valid), + .slv_w_readies_o (subordinate_resp_o[s].w_ready), + .slv_b_chans_o (subordinate_resp_o[s].b), + .slv_b_valids_o (subordinate_resp_o[s].b_valid), + .slv_b_readies_i (subordinate_req_i[s].b_ready), + .slv_ar_chans_i (subordinate_req_i[s].ar), + .slv_ar_valids_i (subordinate_req_i[s].ar_valid), + .slv_ar_readies_o (subordinate_resp_o[s].ar_ready), + .slv_r_chans_o (subordinate_resp_o[s].r), + .slv_r_valids_o (subordinate_resp_o[s].r_valid), + .slv_r_readies_i (subordinate_req_i[s].r_ready), + .mst_aw_chans_o (subordinate_aw[s]), + .mst_aw_valids_o (subordinate_aw_valid[s]), + .mst_aw_readies_i (subordinate_aw_ready[s]), + .mst_w_chans_o (subordinate_w[s]), + .mst_w_valids_o (subordinate_w_valid[s]), + .mst_w_readies_i (subordinate_w_ready[s]), + .mst_b_chans_i (subordinate_b[s]), + .mst_b_valids_i (subordinate_b_valid[s]), + .mst_b_readies_o (subordinate_b_ready[s]), + .mst_ar_chans_o (subordinate_ar[s]), + .mst_ar_valids_o (subordinate_ar_valid[s]), + .mst_ar_readies_i (subordinate_ar_ready[s]), + .mst_r_chans_i (subordinate_r[s]), + .mst_r_valids_i (subordinate_r_valid[s]), + .mst_r_readies_o (subordinate_r_ready[s]) + ); +end + +// AW +// {{{ + logic w_ctrl_fifo_valid_in; + logic w_ctrl_fifo_ready_in; + logic [subordinateIndexWidth-1:0] w_ctrl_fifo_wdata; + logic aw_is_evict; + + rr_arb_tree #( + .NumIn (numSubordinates), + .DataType (ccu_ace_manager_aw_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_aw_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (subordinate_aw_valid), + .gnt_o (subordinate_aw_ready), + .data_i (subordinate_aw), + .req_o (aw_arbiter_valid), + .gnt_i (aw_arbiter_ready), + .data_o (manager_req_o.aw), + .idx_o (w_ctrl_fifo_wdata) + ); + + assign aw_is_evict = ace_is_evict(manager_req_o.aw.bar[0], manager_req_o.aw.domain, manager_req_o.aw.snoop); + + stream_fork_dynamic #( + .N_OUP(2) + ) u_aw_fork ( + .clk_i, + .rst_ni, + .valid_i (aw_arbiter_valid), + .ready_o (aw_arbiter_ready), + .sel_i ({!aw_is_evict, 1'b1}), + .sel_valid_i (1'b1), + .sel_ready_o (), + .valid_o ({w_ctrl_fifo_valid_in, manager_req_o.aw_valid}), + .ready_i ({w_ctrl_fifo_ready_in, manager_resp_i.aw_ready}) + ); +// }}} + +// W +// {{{ + logic w_ctrl_fifo_valid_out; + logic w_ctrl_fifo_ready_out; + logic [subordinateIndexWidth-1:0] w_ctrl_fifo_rdata; + logic w_mux_valid_out; + logic w_mux_ready_out; + + stream_fifo #( + .FALL_THROUGH(fallThrough), + .DATA_WIDTH (subordinateIndexWidth), + .DEPTH (maxWTrans) + ) u_w_ctrl_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (), + .data_i (w_ctrl_fifo_wdata), + .valid_i (w_ctrl_fifo_valid_in), + .ready_o (w_ctrl_fifo_ready_in), + .data_o (w_ctrl_fifo_rdata), + .valid_o (w_ctrl_fifo_valid_out), + .ready_i (w_ctrl_fifo_ready_out && manager_req_o.w.last) + ); + + stream_mux #( + .DATA_T(ccu_w_t), + .N_INP (numSubordinates) + ) u_w_mux ( + .inp_data_i (subordinate_w), + .inp_valid_i(subordinate_w_valid), + .inp_ready_o(subordinate_w_ready), + .inp_sel_i (w_ctrl_fifo_rdata), + .oup_data_o (manager_req_o.w), + .oup_valid_o(w_mux_valid_out), + .oup_ready_i(w_mux_ready_out) + ); + + stream_join #( + .N_INP(2) + ) u_w_join ( + .inp_valid_i({w_ctrl_fifo_valid_out, w_mux_valid_out}), + .inp_ready_o({w_ctrl_fifo_ready_out, w_mux_ready_out}), + .oup_valid_o(manager_req_o.w_valid), + .oup_ready_i(manager_resp_i.w_ready) + ); +// }}} + +// B +// {{{ +logic [subordinateIndexWidth-1:0] b_demux_sel; + +assign b_demux_sel = manager_resp_i.b.id[aceManagerIdWidth-1:aceSubordinateIdWidth]; + +stream_demux #( + .N_OUP (numSubordinates) +) u_b_demux ( + .inp_valid_i (manager_resp_i.b_valid), + .inp_ready_o (manager_req_o.b_ready), + .oup_sel_i (b_demux_sel), + .oup_valid_o (subordinate_b_valid), + .oup_ready_i (subordinate_b_ready) +); + +assign subordinate_b = {numSubordinates{manager_resp_i.b}}; +// }}} + +// AR +// {{{ + rr_arb_tree #( + .NumIn (numSubordinates), + .DataType (ccu_ace_manager_ar_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_ar_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (subordinate_ar_valid), + .gnt_o (subordinate_ar_ready), + .data_i (subordinate_ar), + .req_o (manager_req_o.ar_valid), + .gnt_i (manager_resp_i.ar_ready), + .data_o (manager_req_o.ar), + .idx_o () + ); +// }}} + +// R +// {{{ +logic [subordinateIndexWidth-1:0] r_demux_sel; + +assign r_demux_sel = manager_resp_i.r.id[aceManagerIdWidth-1:aceSubordinateIdWidth]; + +stream_demux #( + .N_OUP (numSubordinates) +) u_r_demux ( + .inp_valid_i (manager_resp_i.r_valid), + .inp_ready_o (manager_req_o.r_ready), + .oup_sel_i (r_demux_sel), + .oup_valid_o (subordinate_r_valid), + .oup_ready_i (subordinate_r_ready) +); + +assign subordinate_r = {numSubordinates{manager_resp_i.r}}; +// }}} + + +endmodule From a2f27b5fc38ff48f5b77f568745eb562085ba6c4 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Thu, 26 Mar 2026 14:28:52 +0100 Subject: [PATCH 107/109] frontend: balance pipelining --- src/ccu/ccu_exclusive_monitor.sv | 353 +++++++++++++++---------------- src/ccu/ccu_frontend.sv | 285 ++++++++++++++++--------- src/ccu/ccu_top.sv | 30 +-- 3 files changed, 360 insertions(+), 308 deletions(-) diff --git a/src/ccu/ccu_exclusive_monitor.sv b/src/ccu/ccu_exclusive_monitor.sv index d5c52a3..e79c8a4 100644 --- a/src/ccu/ccu_exclusive_monitor.sv +++ b/src/ccu/ccu_exclusive_monitor.sv @@ -12,7 +12,6 @@ // Authors: // - Riccardo Tedeschi -`include "axi/assign.svh" `include "ace/assign.svh" module ccu_exclusive_monitor @@ -21,30 +20,33 @@ module ccu_exclusive_monitor #( parameter ccu_config_t ccuCfg = '{default: '0}, - parameter type ccu_ace_ar_t = logic, - parameter type ccu_ace_r_t = logic + parameter type ccu_ace_ar_t = logic, + parameter type ccu_ace_r_t = logic ) ( - input logic clk_i, - input logic rst_ni, - - input logic [ccuCfg.u.numSubordinates-1:0] dealloc_i, - output logic [ccuCfg.u.numSubordinates-1:0] sc_fail_o, - - output logic [ccuCfg.u.numSubordinates-1:0] r_id_hit_o, - - input ccu_ace_ar_t [ccuCfg.u.numSubordinates-1:0] ar_i, - input logic [ccuCfg.u.numSubordinates-1:0] ar_valid_i, - output logic [ccuCfg.u.numSubordinates-1:0] ar_ready_o, - output ccu_ace_r_t [ccuCfg.u.numSubordinates-1:0] r_o, - output logic [ccuCfg.u.numSubordinates-1:0] r_valid_o, - input logic [ccuCfg.u.numSubordinates-1:0] r_ready_i, - - output ccu_ace_ar_t [ccuCfg.u.numSubordinates-1:0] ar_o, - output logic [ccuCfg.u.numSubordinates-1:0] ar_valid_o, - input logic [ccuCfg.u.numSubordinates-1:0] ar_ready_i, - input ccu_ace_r_t [ccuCfg.u.numSubordinates-1:0] r_i, - input logic [ccuCfg.u.numSubordinates-1:0] r_valid_i, - output logic [ccuCfg.u.numSubordinates-1:0] r_ready_o + input logic clk_i, + input logic rst_ni, + + input logic [ccuCfg.u.numSubordinates-1:0] dealloc_i, + output logic [ccuCfg.u.numSubordinates-1:0] lock_o, + output logic [ccuCfg.u.numSubordinates-1:0] + [ccuCfg.u.axiSubordinateIdWidth-1:0] entry_id_o, + output logic sc_fail_o, + + input ccu_ace_ar_t ar_i, + input logic ar_valid_i, + output logic ar_ready_o, + + output ccu_ace_ar_t ar_o, + output logic ar_valid_o, + input logic ar_ready_i, + + input ccu_ace_r_t r_i, + input logic r_valid_i, + output logic r_ready_o, + + output ccu_ace_r_t r_o, + output logic r_valid_o, + input logic r_ready_i ); typedef struct packed { @@ -52,178 +54,167 @@ typedef struct packed { } exclusive_monitor_entry_t; typedef struct packed { - logic [ccuCfg.u.axiSubordinateIdWidth-1:0] id; - logic [ccuCfg.u.axiUserWidth-1:0] user; + logic [ccuCfg.axiCcuIdWidth-1:0] id; + logic [ccuCfg.u.axiUserWidth-1:0] user; } r_register_entry_t; -exclusive_monitor_entry_t [ccuCfg.u.numSubordinates-1:0] entry_q; -exclusive_monitor_entry_t [ccuCfg.u.numSubordinates-1:0] entry_d; -logic [ccuCfg.u.numSubordinates-1:0] valid_q; -logic [ccuCfg.u.numSubordinates-1:0] valid_d; -logic [ccuCfg.u.numSubordinates-1:0] lock_q; -logic [ccuCfg.u.numSubordinates-1:0] lock_d; - -logic [ccuCfg.u.numSubordinates-1:0] exclusive_store_pass; - -for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_entry - logic is_exclusive_sequence; - logic is_exclusive_load; - logic is_exclusive_store; - logic reservation_set; - logic reservation_reset; - logic exclusive_store_will_fail; - - r_register_entry_t r_register_wdata; - r_register_entry_t r_register_rdata; - logic r_register_valid; - logic r_register_ready; - logic r_ack_valid; - logic r_ack_ready; - ccu_ace_r_t r_ack; - - logic ar_valid; - logic ar_ready; - - assign r_id_hit_o[s] = entry_q[s].id == r_o[s].id; - - assign is_exclusive_load = - ace_ar_is_exclusive_load ( - ar_i[s].bar[0], - ar_i[s].domain, - ar_i[s].snoop, - ar_i[s].lock - ); - - assign is_exclusive_store = - ace_ar_is_exclusive_store( - ar_i[s].bar[0], - ar_i[s].domain, - ar_i[s].snoop, - ar_i[s].lock - ); - - assign is_exclusive_sequence = is_exclusive_store || is_exclusive_load; - - assign reservation_set = ar_valid_i[s] && ar_ready_o[s] && is_exclusive_sequence; - assign reservation_reset = |exclusive_store_pass && !exclusive_store_pass[s]; - assign exclusive_store_pass[s] = ar_valid_i[s] && ar_ready_o[s] && is_exclusive_store && valid_q[s]; - assign exclusive_store_will_fail = !valid_q[s] && is_exclusive_store; - - always_comb begin - entry_d[s] = entry_q[s]; - valid_d[s] = valid_q[s]; - lock_d [s] = lock_q [s]; +exclusive_monitor_entry_t [ccuCfg.u.numSubordinates-1:0] entry_q, entry_d; +logic [ccuCfg.u.numSubordinates-1:0] valid_q, valid_d; +logic [ccuCfg.u.numSubordinates-1:0] lock_q, lock_d; + +logic [ccuCfg.subordinateIndexWidth-1:0] ar_sub_idx; +assign ar_sub_idx = ar_i.id[ccuCfg.axiCcuIdWidth-1-:ccuCfg.subordinateIndexWidth]; + +logic is_exclusive_load; +logic is_exclusive_store; +logic is_exclusive_sequence; +logic exclusive_store_will_fail; + +assign is_exclusive_load = ace_ar_is_exclusive_load( + ar_i.bar[0], ar_i.domain, ar_i.snoop, ar_i.lock +); + +assign is_exclusive_store = ace_ar_is_exclusive_store( + ar_i.bar[0], ar_i.domain, ar_i.snoop, ar_i.lock +); + +assign is_exclusive_sequence = is_exclusive_load || is_exclusive_store; +assign exclusive_store_will_fail = is_exclusive_store && !valid_q[ar_sub_idx]; + +logic ar_handshake; +assign ar_handshake = ar_valid_i && ar_ready_o; + +logic exclusive_store_pass; +assign exclusive_store_pass = ar_handshake && is_exclusive_store && valid_q[ar_sub_idx]; + +logic reservation_set; +assign reservation_set = ar_handshake && is_exclusive_sequence; +always_comb begin + entry_d = entry_q; + valid_d = valid_q; + lock_d = lock_q; + + for (int s = 0; s < ccuCfg.u.numSubordinates; s++) begin if (dealloc_i[s]) begin lock_d[s] = 1'b0; - end else if (exclusive_store_pass[s]) begin + end else if (exclusive_store_pass && + ccuCfg.subordinateIndexWidth'(s) == ar_sub_idx) begin lock_d[s] = 1'b1; - end else if (reservation_reset) begin + end else if (exclusive_store_pass && + ccuCfg.subordinateIndexWidth'(s) != ar_sub_idx) begin valid_d[s] = 1'b0; - end else if (reservation_set) begin + end else if (reservation_set && + ccuCfg.subordinateIndexWidth'(s) == ar_sub_idx) begin valid_d[s] = 1'b1; - entry_d[s].id = ar_i[s].id; + entry_d[s].id = ar_i.id[ccuCfg.u.axiSubordinateIdWidth-1:0]; end end +end - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - entry_q[s] <= '0; - valid_q[s] <= 1'b0; - lock_q [s] <= 1'b0; - end else begin - entry_q[s] <= entry_d[s]; - valid_q[s] <= valid_d[s]; - lock_q [s] <= lock_d [s]; - end +always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + entry_q <= '0; + valid_q <= '0; + lock_q <= '0; + end else begin + entry_q <= entry_d; + valid_q <= valid_d; + lock_q <= lock_d; end +end - assign ar_valid = ar_valid_i[s] && - !(is_exclusive_sequence && |lock_q && !lock_q[s]); - assign ar_ready_o[s] = ar_ready && - !(is_exclusive_sequence && |lock_q && !lock_q[s]); - - stream_demux #( - .N_OUP (2) - ) u_ar_demux ( - .inp_valid_i (ar_valid), - .inp_ready_o (ar_ready), - .oup_sel_i (exclusive_store_will_fail), - .oup_valid_o ({r_register_valid, ar_valid_o[s]}), - .oup_ready_i ({r_register_ready, ar_ready_i[s]}) - ); - - `ACE_ASSIGN_AR_STRUCT(ar_o[s], ar_i[s]) - - assign r_register_wdata = '{ - id: ar_i[s].id, - user: ar_i[s].user - }; - - stream_register #( - .T (r_register_entry_t) - ) u_r_register ( - .clk_i, - .rst_ni, - .clr_i (1'b0), - .testmode_i (1'b0), - .valid_i (r_register_valid), - .ready_o (r_register_ready), - .data_i (r_register_wdata), - .valid_o (r_ack_valid), - .ready_i (r_ack_ready), - .data_o (r_register_rdata) - ); - - assign r_ack = '{ - id: r_register_rdata.id, - data: r_i[s].data, // Don't care - resp: {2'b00, axi_pkg::RESP_OKAY}, - last: 1'b1, - user: r_register_rdata.user - }; - - logic [1:0] r_arbiter_valid; - logic [1:0] r_arbiter_ready; - logic [1:0] mask_d; - logic [1:0] mask_q; - - assign r_arbiter_valid = {r_ack_valid, r_valid_i[s]} & ~mask_q; - assign {r_ack_ready, r_ready_o[s]} = r_arbiter_ready & ~mask_q; - - always_comb begin : mask_comb - mask_d = mask_q; - if (r_valid_o[s] && r_ready_i[s] && r_o[s].last) - mask_d = '0; - else if (r_valid_o[s] && r_ready_i[s]) - mask_d = ~(r_arbiter_valid & r_arbiter_ready); - end +assign lock_o = lock_q; - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) mask_q <= '0; - else mask_q <= mask_d; - end +for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_entry_id + assign entry_id_o[s] = entry_q[s].id; +end + +logic r_register_valid, r_register_ready; + +stream_demux #( + .N_OUP (2) +) u_ar_demux ( + .inp_valid_i (ar_valid_i), + .inp_ready_o (ar_ready_o), + .oup_sel_i (exclusive_store_will_fail), + .oup_valid_o ({r_register_valid, ar_valid_o}), + .oup_ready_i ({r_register_ready, ar_ready_i}) +); - rr_arb_tree #( - .NumIn (2), - .DataType (ccu_ace_r_t), - .ExtPrio (1'b0), - .AxiVldRdy (1'b1), - .LockIn (1'b1), - .FairArb (1'b1) - ) u_r_arbiter ( - .clk_i, - .rst_ni, - .flush_i (1'b0), - .rr_i ('0), - .req_i (r_arbiter_valid), - .gnt_o (r_arbiter_ready), - .data_i ({r_ack, r_i[s]}), - .req_o (r_valid_o[s]), - .gnt_i (r_ready_i[s]), - .data_o (r_o[s]), - .idx_o (sc_fail_o[s]) - ); +`ACE_ASSIGN_AR_STRUCT(ar_o, ar_i) + +r_register_entry_t r_register_wdata, r_register_rdata; +logic r_ack_valid, r_ack_ready; +ccu_ace_r_t r_ack; + +assign r_register_wdata = '{ + id: ar_i.id, + user: ar_i.user +}; + +stream_register #( + .T (r_register_entry_t) +) u_r_register ( + .clk_i, + .rst_ni, + .clr_i (1'b0), + .testmode_i (1'b0), + .valid_i (r_register_valid), + .ready_o (r_register_ready), + .data_i (r_register_wdata), + .valid_o (r_ack_valid), + .ready_i (r_ack_ready), + .data_o (r_register_rdata) +); + +assign r_ack = '{ + id: r_register_rdata.id, + data: '0, + resp: {2'b00, axi_pkg::RESP_OKAY}, + last: 1'b1, + user: r_register_rdata.user +}; + +logic [1:0] r_arbiter_valid; +logic [1:0] r_arbiter_ready; +logic [1:0] mask_d, mask_q; + +assign r_arbiter_valid = {r_ack_valid, r_valid_i} & ~mask_q; +assign {r_ack_ready, r_ready_o} = r_arbiter_ready & ~mask_q; + +always_comb begin : mask_comb + mask_d = mask_q; + if (r_valid_o && r_ready_i && r_o.last) + mask_d = '0; + else if (r_valid_o && r_ready_i) + mask_d = ~(r_arbiter_valid & r_arbiter_ready); +end + +always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) mask_q <= '0; + else mask_q <= mask_d; end +rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ace_r_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) +) u_r_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (r_arbiter_valid), + .gnt_o (r_arbiter_ready), + .data_i ({r_ack, r_i}), + .req_o (r_valid_o), + .gnt_i (r_ready_i), + .data_o (r_o), + .idx_o (sc_fail_o) +); + endmodule diff --git a/src/ccu/ccu_frontend.sv b/src/ccu/ccu_frontend.sv index 5a99972..0b42a62 100644 --- a/src/ccu/ccu_frontend.sv +++ b/src/ccu/ccu_frontend.sv @@ -63,31 +63,43 @@ module ccu_frontend logic exclusive; } rack_fifo_entry_t; + typedef struct packed { + ccu_ace_manager_r_t r; + logic sc_fail; + } r_spill_entry_t; + ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_req; ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_resp; - ccu_ace_subordinate_ar_t [ccuCfg.u.numSubordinates-1:0] subordinate_ar; - logic [ccuCfg.u.numSubordinates-1:0] subordinate_ar_valid; - logic [ccuCfg.u.numSubordinates-1:0] subordinate_ar_ready; - ccu_ace_subordinate_r_t [ccuCfg.u.numSubordinates-1:0] subordinate_r; - logic [ccuCfg.u.numSubordinates-1:0] subordinate_r_valid; - logic [ccuCfg.u.numSubordinates-1:0] subordinate_r_ready; + ccu_ace_manager_req_t arbiter_req; + ccu_ace_manager_resp_t arbiter_resp; - ccu_ace_subordinate_ar_t [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_ar; - logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_ar_valid; - logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_ar_ready; - ccu_ace_subordinate_r_t [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_r; - logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_r_valid; - logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_r_ready; + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_lock; + logic [ccuCfg.u.numSubordinates-1:0][ccuCfg.u.axiSubordinateIdWidth-1:0] exclusive_monitor_entry_id; + logic exclusive_monitor_sc_fail; + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_dealloc; + // Exclusive monitor AR/R (post AR-spill, pre manager) + ccu_ace_manager_ar_t exclusive_monitor_ar_in; + logic exclusive_monitor_ar_valid_in; + logic exclusive_monitor_ar_ready_in; - logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_id_hit; - logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_dealloc; - logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_sc_fail; + // Exclusive monitor R output (pre R-spill) + ccu_ace_manager_r_t exclusive_monitor_r; + logic exclusive_monitor_r_valid; + logic exclusive_monitor_r_ready; + + r_spill_entry_t r_spill_in; + r_spill_entry_t r_spill_out; + logic r_spill_valid_out; + logic r_spill_ready_out; // Per-subordinate logic // {{{ - for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_subordinate_monitor + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_subordinate + + logic is_exclusive_sequence; + logic lock_stall; logic rack_fifo_full; rack_fifo_entry_t rack_fifo_wdata; @@ -95,57 +107,44 @@ module ccu_frontend logic rack_fifo_push; logic rack_fifo_pop; - always_comb begin : ar_comb - // Input request --> exclusive monitor - `ACE_SET_AR_STRUCT(subordinate_ar[s], subordinate_req_i[s].ar) - subordinate_ar_valid[s] = subordinate_req_i[s].ar_valid; - subordinate_resp_o[s].ar_ready = subordinate_ar_ready[s]; - - // Exclusive monitor --> mux - `ACE_SET_AR_STRUCT(subordinate_req[s].ar, exclusive_monitor_ar[s]) - subordinate_req[s].ar_valid = exclusive_monitor_ar_valid[s]; - exclusive_monitor_ar_ready[s] = subordinate_resp[s].ar_ready; - end - - always_comb begin : r_comb - // Input request <-- exclusive monitor - `ACE_SET_R_STRUCT(subordinate_resp_o[s].r, subordinate_r[s]) - subordinate_resp_o[s].r_valid = subordinate_r_valid[s]; - subordinate_r_ready[s] = subordinate_req_i[s].r_ready; - - // Exclusive monitor <-- mux - `ACE_SET_R_STRUCT(exclusive_monitor_r[s], subordinate_resp[s].r) - exclusive_monitor_r_valid[s] = subordinate_resp[s].r_valid; - subordinate_req[s].r_ready = exclusive_monitor_r_ready[s]; - - // Stall R responses once the RACK fifo is full - if (rack_fifo_full) begin - exclusive_monitor_r_valid[s] = 1'b0; - subordinate_req[s].r_ready = 1'b0; - end - end - - always_comb begin : aw_comb - // Input request --> mux - `ACE_SET_AW_STRUCT(subordinate_req[s].aw, subordinate_req_i[s].aw) - subordinate_req[s].aw_valid = subordinate_req_i[s].aw_valid; - subordinate_resp_o[s].aw_ready = subordinate_resp[s].aw_ready; - end - - always_comb begin : w_comb - // Input request --> mux - `AXI_SET_W_STRUCT(subordinate_req[s].w, subordinate_req_i[s].w) - subordinate_req[s].w_valid = subordinate_req_i[s].w_valid; - subordinate_resp_o[s].w_ready = subordinate_resp[s].w_ready; - end - - always_comb begin : b_comb - // Input request <-- mux - `AXI_SET_B_STRUCT(subordinate_resp_o[s].b, subordinate_resp[s].b) - subordinate_resp_o[s].b_valid = subordinate_resp[s].b_valid; - subordinate_req[s].b_ready = subordinate_req_i[s].b_ready; - end - + logic r_id_hit; + + assign is_exclusive_sequence = + ace_ar_is_exclusive_load ( + subordinate_req_i[s].ar.bar[0], + subordinate_req_i[s].ar.domain, + subordinate_req_i[s].ar.snoop, + subordinate_req_i[s].ar.lock + ) || + ace_ar_is_exclusive_store( + subordinate_req_i[s].ar.bar[0], + subordinate_req_i[s].ar.domain, + subordinate_req_i[s].ar.snoop, + subordinate_req_i[s].ar.lock + ); + + assign lock_stall = is_exclusive_sequence && + |exclusive_monitor_lock && !exclusive_monitor_lock[s]; + + `ACE_ASSIGN_AR_STRUCT(subordinate_req[s].ar, subordinate_req_i[s].ar) + assign subordinate_req[s].ar_valid = subordinate_req_i[s].ar_valid && !lock_stall; + assign subordinate_resp_o[s].ar_ready = subordinate_resp[s].ar_ready && !lock_stall; + + `ACE_ASSIGN_R_STRUCT(subordinate_resp_o[s].r, subordinate_resp[s].r) + assign subordinate_resp_o[s].r_valid = subordinate_resp[s].r_valid && !rack_fifo_full; + assign subordinate_req[s].r_ready = subordinate_req_i[s].r_ready && !rack_fifo_full; + + `ACE_ASSIGN_AW_STRUCT(subordinate_req[s].aw, subordinate_req_i[s].aw) + assign subordinate_req[s].aw_valid = subordinate_req_i[s].aw_valid; + assign subordinate_resp_o[s].aw_ready = subordinate_resp[s].aw_ready; + + `AXI_ASSIGN_W_STRUCT(subordinate_req[s].w, subordinate_req_i[s].w) + assign subordinate_req[s].w_valid = subordinate_req_i[s].w_valid; + assign subordinate_resp_o[s].w_ready = subordinate_resp[s].w_ready; + + `AXI_ASSIGN_B_STRUCT(subordinate_resp_o[s].b, subordinate_resp[s].b) + assign subordinate_resp_o[s].b_valid = subordinate_resp[s].b_valid; + assign subordinate_req[s].b_ready = subordinate_req_i[s].b_ready; // The xACK signal is used to extend the lifetime of // a transaction beyond the last R handshake. @@ -161,10 +160,12 @@ module ccu_frontend // - clear the corresponding exclusive monitor entry // SC failure responses are locally generated, thus no entry should be cleared // once the RACK arrives + assign r_id_hit = exclusive_monitor_entry_id[s] == subordinate_resp_o[s].r.id; + assign rack_fifo_wdata = '{ tid: scoreboard_dealloc_entry_i, - dealloc: scoreboard_dealloc_hit_i && !exclusive_monitor_sc_fail[s], - exclusive: exclusive_monitor_id_hit[s] && !exclusive_monitor_sc_fail[s] + dealloc: scoreboard_dealloc_hit_i && !r_spill_out.sc_fail, + exclusive: r_id_hit && !r_spill_out.sc_fail }; assign rack_fifo_pop = subordinate_rack_i[s]; @@ -191,31 +192,6 @@ module ccu_frontend assign scoreboard_dealloc_entry_o[s] = rack_fifo_rdata.tid; assign exclusive_monitor_dealloc[s] = subordinate_rack_i[s] && rack_fifo_rdata.exclusive; end - - // ACE exclusive monitor as prescribed in the specs - ccu_exclusive_monitor #( - .ccuCfg (ccuCfg), - .ccu_ace_ar_t (ccu_ace_subordinate_ar_t), - .ccu_ace_r_t (ccu_ace_subordinate_r_t) - ) u_ccu_exclusive_monitor ( - .clk_i, - .rst_ni, - .dealloc_i (exclusive_monitor_dealloc), - .sc_fail_o (exclusive_monitor_sc_fail), - .r_id_hit_o (exclusive_monitor_id_hit), - .ar_i (subordinate_ar), - .ar_valid_i (subordinate_ar_valid), - .ar_ready_o (subordinate_ar_ready), - .r_o (subordinate_r), - .r_valid_o (subordinate_r_valid), - .r_ready_i (subordinate_r_ready), - .ar_o (exclusive_monitor_ar), - .ar_valid_o (exclusive_monitor_ar_valid), - .ar_ready_i (exclusive_monitor_ar_ready), - .r_i (exclusive_monitor_r), - .r_valid_i (exclusive_monitor_r_valid), - .r_ready_o (exclusive_monitor_r_ready) - ); // }}} // Point of Serialization (PoS) @@ -245,15 +221,126 @@ module ccu_frontend .rst_ni, .subordinate_req_i (subordinate_req), .subordinate_resp_o (subordinate_resp), - .manager_req_o (manager_req_o), - .manager_resp_i (manager_resp_i) + .manager_req_o (arbiter_req), + .manager_resp_i (arbiter_resp) ); // }}} - // Scoreboard dealloc check + // Per-channel spill registers + // {{{ + spill_register #( + .T (ccu_ace_manager_ar_t), + .Bypass (!ccuCfg.u.frontendPipeAr) + ) u_ar_spill ( + .clk_i, + .rst_ni, + .valid_i (arbiter_req.ar_valid), + .ready_o (arbiter_resp.ar_ready), + .data_i (arbiter_req.ar), + .valid_o (exclusive_monitor_ar_valid_in), + .ready_i (exclusive_monitor_ar_ready_in), + .data_o (exclusive_monitor_ar_in) + ); + + spill_register #( + .T (ccu_ace_manager_aw_t), + .Bypass (!ccuCfg.u.frontendPipeAw) + ) u_aw_spill ( + .clk_i, + .rst_ni, + .valid_i (arbiter_req.aw_valid), + .ready_o (arbiter_resp.aw_ready), + .data_i (arbiter_req.aw), + .valid_o (manager_req_o.aw_valid), + .ready_i (manager_resp_i.aw_ready), + .data_o (manager_req_o.aw) + ); + + spill_register #( + .T (ccu_w_t), + .Bypass (!ccuCfg.u.frontendPipeW) + ) u_w_spill ( + .clk_i, + .rst_ni, + .valid_i (arbiter_req.w_valid), + .ready_o (arbiter_resp.w_ready), + .data_i (arbiter_req.w), + .valid_o (manager_req_o.w_valid), + .ready_i (manager_resp_i.w_ready), + .data_o (manager_req_o.w) + ); + + spill_register #( + .T (ccu_ace_manager_b_t), + .Bypass (!ccuCfg.u.frontendPipeB) + ) u_b_spill ( + .clk_i, + .rst_ni, + .valid_i (manager_resp_i.b_valid), + .ready_o (manager_req_o.b_ready), + .data_i (manager_resp_i.b), + .valid_o (arbiter_resp.b_valid), + .ready_i (arbiter_req.b_ready), + .data_o (arbiter_resp.b) + ); + + // R channel: wrap R data + sc_fail through the spill register + assign r_spill_in = '{ + r: exclusive_monitor_r, + sc_fail: exclusive_monitor_sc_fail + }; + + spill_register #( + .T (r_spill_entry_t), + .Bypass (!ccuCfg.u.frontendPipeR) + ) u_r_spill ( + .clk_i, + .rst_ni, + .valid_i (exclusive_monitor_r_valid), + .ready_o (exclusive_monitor_r_ready), + .data_i (r_spill_in), + .valid_o (r_spill_valid_out), + .ready_i (r_spill_ready_out), + .data_o (r_spill_out) + ); + + `ACE_ASSIGN_R_STRUCT(arbiter_resp.r, r_spill_out.r) + assign arbiter_resp.r_valid = r_spill_valid_out; + assign r_spill_ready_out = arbiter_req.r_ready; + // }}} + + // ACE exclusive monitor + // {{{ + ccu_exclusive_monitor #( + .ccuCfg (ccuCfg), + .ccu_ace_ar_t (ccu_ace_manager_ar_t), + .ccu_ace_r_t (ccu_ace_manager_r_t) + ) u_ccu_exclusive_monitor ( + .clk_i, + .rst_ni, + .dealloc_i (exclusive_monitor_dealloc), + .lock_o (exclusive_monitor_lock), + .entry_id_o (exclusive_monitor_entry_id), + .sc_fail_o (exclusive_monitor_sc_fail), + .ar_i (exclusive_monitor_ar_in), + .ar_valid_i (exclusive_monitor_ar_valid_in), + .ar_ready_o (exclusive_monitor_ar_ready_in), + .ar_o (manager_req_o.ar), + .ar_valid_o (manager_req_o.ar_valid), + .ar_ready_i (manager_resp_i.ar_ready), + .r_i (manager_resp_i.r), + .r_valid_i (manager_resp_i.r_valid), + .r_ready_o (manager_req_o.r_ready), + .r_o (exclusive_monitor_r), + .r_valid_o (exclusive_monitor_r_valid), + .r_ready_i (exclusive_monitor_r_ready) + ); + // }}} + + // Scoreboard dealloc check on post-spill R (aligned with per-sub R demux) // {{{ assign scoreboard_dealloc_check_o = - manager_resp_i.r_valid && manager_req_o.r_ready && manager_resp_i.r.last; - assign scoreboard_dealloc_id_o = manager_resp_i.r.id; + r_spill_valid_out && r_spill_ready_out && r_spill_out.r.last; + assign scoreboard_dealloc_id_o = r_spill_out.r.id; // }}} endmodule diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv index 68a007d..ffe8c51 100644 --- a/src/ccu/ccu_top.sv +++ b/src/ccu/ccu_top.sv @@ -136,35 +136,9 @@ ccu_snoop_pipeline_events_t perf_events; // Frontend // {{{ // The frontend acts as the Point of Serialization (PoS) - ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_cut_req; - ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_cut_resp; ccu_ace_req_t frontend_req; ccu_ace_resp_t frontend_resp; - for (genvar s = 0 ; s < ccuCfg.u.numSubordinates; s++) begin : gen_subordinate_cut - axi_cut #( - .BypassAw (!ccuCfg.u.frontendPipeAw), - .BypassW (!ccuCfg.u.frontendPipeW), - .BypassB (!ccuCfg.u.frontendPipeB), - .BypassAr (!ccuCfg.u.frontendPipeAr), - .BypassR (!ccuCfg.u.frontendPipeR), - .aw_chan_t (ccu_ace_subordinate_aw_t), - .w_chan_t (ccu_w_t), - .b_chan_t (ccu_ace_subordinate_b_t), - .ar_chan_t (ccu_ace_subordinate_ar_t), - .r_chan_t (ccu_ace_subordinate_r_t), - .axi_req_t (ccu_ace_subordinate_req_t), - .axi_resp_t (ccu_ace_subordinate_resp_t) - ) u_subordinate_cut ( - .clk_i, - .rst_ni, - .slv_req_i (subordinate_req_i[s]), - .slv_resp_o (subordinate_resp_o[s]), - .mst_req_o (subordinate_cut_req[s]), - .mst_resp_i (subordinate_cut_resp[s]) - ); - end - ccu_frontend #( .ccuCfg (ccuCfg), .ccu_ace_manager_ar_t (ccu_ace_ar_t), @@ -183,8 +157,8 @@ ccu_snoop_pipeline_events_t perf_events; ) u_ccu_frontend ( .clk_i, .rst_ni, - .subordinate_req_i (subordinate_cut_req), - .subordinate_resp_o (subordinate_cut_resp), + .subordinate_req_i (subordinate_req_i), + .subordinate_resp_o (subordinate_resp_o), .subordinate_rack_i (subordinate_rack_i), .subordinate_wack_i (subordinate_wack_i), .manager_req_o (frontend_req), From 437fddbf173b432366e546ec80266f0ec8f5bf87 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 19 May 2026 15:59:28 +0200 Subject: [PATCH 108/109] ccu_csr: update generated code with peakrdl v1.5.0 --- src/ccu/ccu_csr_wrap.sv | 3 -- src/ccu/regs/ccu_csr.rdl | 4 +-- src/ccu/regs/generated/ccu_csr.sv | 44 ++++++++++++++++----------- src/ccu/regs/generated/ccu_csr_pkg.sv | 2 ++ 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/ccu/ccu_csr_wrap.sv b/src/ccu/ccu_csr_wrap.sv index 29c7557..46eefd4 100644 --- a/src/ccu/ccu_csr_wrap.sv +++ b/src/ccu/ccu_csr_wrap.sv @@ -30,9 +30,6 @@ module ccu_csr_wrap input logic [numEvents-1:0] events_i ); - localparam int unsigned numPerfCounters = - $bits(ccu_csr__perf_countinhibit_r__inh__out_t); - ccu_csr__in_t hwif_in; ccu_csr__out_t hwif_out; diff --git a/src/ccu/regs/ccu_csr.rdl b/src/ccu/regs/ccu_csr.rdl index 92ecacd..592c363 100644 --- a/src/ccu/regs/ccu_csr.rdl +++ b/src/ccu/regs/ccu_csr.rdl @@ -50,8 +50,8 @@ addrmap ccu_csr #( perf_countinhibit_r perf_countinhibit @ 0x00; // Array of Event Selectors @ 0x40 - perf_eventsel_r perf_eventsel[numPerfCounters] @ 0x40 += 0x4; + perf_eventsel_r perf_eventsel[numPerfCounters] @ 0x40; // Array of 32-bit Counters @ 0xC0 - perf_counter_r perf_counter[numPerfCounters] @ 0xC0 += 0x4; + perf_counter_r perf_counter[numPerfCounters] @ 0xC0; }; diff --git a/src/ccu/regs/generated/ccu_csr.sv b/src/ccu/regs/generated/ccu_csr.sv index 3ba3f21..577beaa 100644 --- a/src/ccu/regs/generated/ccu_csr.sv +++ b/src/ccu/regs/generated/ccu_csr.sv @@ -92,12 +92,18 @@ module ccu_csr ( logic perf_counter[32]; } decoded_reg_strb_t; decoded_reg_strb_t decoded_reg_strb; + logic decoded_err; + logic [8:0] decoded_addr; logic decoded_req; logic decoded_req_is_wr; logic [31:0] decoded_wr_data; logic [31:0] decoded_wr_biten; always_comb begin + automatic logic is_valid_addr; + automatic logic is_valid_rw; + is_valid_addr = '1; // No valid address check + is_valid_rw = '1; // No valid RW check decoded_reg_strb.perf_countinhibit = cpuif_req_masked & (cpuif_addr == 9'h0); for(int i0=0; i0<32; i0++) begin decoded_reg_strb.perf_eventsel[i0] = cpuif_req_masked & (cpuif_addr == 9'h40 + (9)'(i0) * 9'h4); @@ -105,9 +111,11 @@ module ccu_csr ( for(int i0=0; i0<32; i0++) begin decoded_reg_strb.perf_counter[i0] = cpuif_req_masked & (cpuif_addr == 9'hc0 + (9)'(i0) * 9'h4); end + decoded_err = '0; end // Pass down signals to next stage + assign decoded_addr = cpuif_addr; assign decoded_req = cpuif_req_masked; assign decoded_req_is_wr = cpuif_req_is_wr; assign decoded_wr_data = cpuif_wr_data; @@ -219,7 +227,7 @@ module ccu_csr ( load_next_c = '1; end if(hwif_in.perf_counter[i0].val.incr) begin // increment - field_combo.perf_counter[i0].val.overflow = (((33)'(next_c) + 32'h1) > 32'hffffffff); + field_combo.perf_counter[i0].val.overflow = (((33)'(next_c) + 32'h1) > 33'hffffffff); next_c = next_c + 32'h1; load_next_c = '1; end else begin @@ -252,29 +260,31 @@ module ccu_csr ( // Readback //-------------------------------------------------------------------------- + logic [8:0] rd_mux_addr; + assign rd_mux_addr = decoded_addr; + logic readback_err; logic readback_done; logic [31:0] readback_data; - - // Assign readback values to a flattened array - logic [31:0] readback_array[65]; - assign readback_array[0][31:0] = (decoded_reg_strb.perf_countinhibit && !decoded_req_is_wr) ? field_storage.perf_countinhibit.inh.value : '0; - for(genvar i0=0; i0<32; i0++) begin - assign readback_array[i0 * 1 + 1][7:0] = (decoded_reg_strb.perf_eventsel[i0] && !decoded_req_is_wr) ? field_storage.perf_eventsel[i0].event_id.value : '0; - assign readback_array[i0 * 1 + 1][31:8] = '0; - end - for(genvar i0=0; i0<32; i0++) begin - assign readback_array[i0 * 1 + 33][31:0] = (decoded_reg_strb.perf_counter[i0] && !decoded_req_is_wr) ? field_storage.perf_counter[i0].val.value : '0; - end - - // Reduce the array always_comb begin automatic logic [31:0] readback_data_var; - readback_done = decoded_req & ~decoded_req_is_wr; - readback_err = '0; readback_data_var = '0; - for(int i=0; i<65; i++) readback_data_var |= readback_array[i]; + if(rd_mux_addr == 9'h0) begin + readback_data_var[31:0] = field_storage.perf_countinhibit.inh.value; + end + for(int i0=0; i0<32; i0++) begin + if(rd_mux_addr == 9'h40 + (9)'(i0) * 9'h4) begin + readback_data_var[7:0] = field_storage.perf_eventsel[i0].event_id.value; + end + end + for(int i0=0; i0<32; i0++) begin + if(rd_mux_addr == 9'hc0 + (9)'(i0) * 9'h4) begin + readback_data_var[31:0] = field_storage.perf_counter[i0].val.value; + end + end readback_data = readback_data_var; + readback_done = decoded_req & ~decoded_req_is_wr; + readback_err = '0; end assign cpuif_rd_ack = readback_done; diff --git a/src/ccu/regs/generated/ccu_csr_pkg.sv b/src/ccu/regs/generated/ccu_csr_pkg.sv index 0cdd97e..a807f05 100644 --- a/src/ccu/regs/generated/ccu_csr_pkg.sv +++ b/src/ccu/regs/generated/ccu_csr_pkg.sv @@ -5,6 +5,8 @@ package ccu_csr_pkg; localparam CCU_CSR_DATA_WIDTH = 32; localparam CCU_CSR_MIN_ADDR_WIDTH = 9; + localparam CCU_CSR_SIZE = 'h140; + localparam numPerfCounters = 'h20; typedef struct { logic incr; From a55a6e432237ef7fec4dcb18c0b27ecd197a29e6 Mon Sep 17 00:00:00 2001 From: Riccardo Tedeschi Date: Tue, 19 May 2026 15:59:57 +0200 Subject: [PATCH 109/109] ccu_frontend: fix exclusive dealloc --- src/ccu/ccu_frontend.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccu/ccu_frontend.sv b/src/ccu/ccu_frontend.sv index 0b42a62..b137950 100644 --- a/src/ccu/ccu_frontend.sv +++ b/src/ccu/ccu_frontend.sv @@ -164,8 +164,8 @@ module ccu_frontend assign rack_fifo_wdata = '{ tid: scoreboard_dealloc_entry_i, - dealloc: scoreboard_dealloc_hit_i && !r_spill_out.sc_fail, - exclusive: r_id_hit && !r_spill_out.sc_fail + dealloc: scoreboard_dealloc_hit_i && !r_spill_out.sc_fail, + exclusive: r_id_hit && exclusive_monitor_lock[s] && !r_spill_out.sc_fail }; assign rack_fifo_pop = subordinate_rack_i[s];