From 72e1fa71a8ac696cddb70ff1f3014f554fc673f7 Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Sat, 17 Feb 2024 12:32:41 +0100 Subject: [PATCH] Fix SV lint. --- rtl/redmule_castin.sv | 16 ++--- rtl/redmule_castout.sv | 12 ++-- rtl/redmule_ce.sv | 2 +- rtl/redmule_complex.sv | 57 +++++++++------- rtl/redmule_ctrl.sv | 64 ++++++++++-------- rtl/redmule_engine.sv | 26 ++++---- rtl/redmule_inst_decoder.sv | 13 ++-- rtl/redmule_pkg.sv | 6 +- rtl/redmule_row.sv | 18 ++--- rtl/redmule_scheduler.sv | 130 +++++++++++++++++++++--------------- rtl/redmule_streamer.sv | 10 +-- rtl/redmule_top.sv | 24 +++---- rtl/redmule_w_buffer.sv | 22 +++--- rtl/redmule_wrap.sv | 2 +- rtl/redmule_x_buffer.sv | 20 +++--- rtl/redmule_z_buffer.sv | 28 ++++---- 16 files changed, 246 insertions(+), 204 deletions(-) diff --git a/rtl/redmule_castin.sv b/rtl/redmule_castin.sv index 6252be4..cf1c46d 100644 --- a/rtl/redmule_castin.sv +++ b/rtl/redmule_castin.sv @@ -12,7 +12,7 @@ import redmule_pkg::*; module redmule_castin #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = FpFmtConfig, parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = IntFmtConfig, - parameter fpnew_pkg::fp_format_e dst_format = FPFORMAT, + parameter fpnew_pkg::fp_format_e DstFormat = FPFORMAT, parameter fpnew_pkg::operation_e Operation = CAST_OP, parameter logic Pipe = 1'b0 , localparam int unsigned BW = hci_package::DEFAULT_BW , @@ -47,10 +47,10 @@ logic [NUM_CAST-1:0][WIDTH-1:0] result , operand; generate - for (genvar i = 0; i < NUM_CAST; i++) begin : generate_cast_units + for (genvar i = 0; i < NUM_CAST; i++) begin : gen_cast_units assign operand [i] = {{ZEROBITS{1'b0}}, src_int[i*MIN_FMT+:MIN_FMT]}; - + fpnew_cast_multi #( .FpFmtConfig ( FpFmtConfig ), .IntFmtConfig ( IntFmtConfig ) @@ -63,7 +63,7 @@ generate .op_i ( Operation ), .op_mod_i ( '0 ), .src_fmt_i ( src_fmt_i ), - .dst_fmt_i ( dst_format ), + .dst_fmt_i ( DstFormat ), .int_fmt_i ( INT_SRC ), .tag_i ( '0 ), .mask_i ( '0 ), @@ -81,11 +81,11 @@ generate .out_ready_i ( '1 ), .busy_o ( ) ); - + assign dst_int [i*WIDTH+:WIDTH] = result[i]; - - end // block: generate_cast_units - + + end + endgenerate assign dst_o = cast_i ? dst_int : src_i; diff --git a/rtl/redmule_castout.sv b/rtl/redmule_castout.sv index 1ce759a..1e27e2d 100644 --- a/rtl/redmule_castout.sv +++ b/rtl/redmule_castout.sv @@ -12,7 +12,7 @@ import redmule_pkg::*; module redmule_castout #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = FpFmtConfig, parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = IntFmtConfig, - parameter fpnew_pkg::fp_format_e src_format = FPFORMAT, + parameter fpnew_pkg::fp_format_e SrcFormat = FPFORMAT, parameter fpnew_pkg::operation_e Operation = CAST_OP, parameter logic Pipe = 1'b0 , localparam int unsigned BW = hci_package::DEFAULT_BW , @@ -43,7 +43,7 @@ logic [NUM_CAST-1:0][WIDTH-1:0] result , operand; generate - for (genvar i = 0; i < NUM_CAST; i++) begin : generate_cast_units + for (genvar i = 0; i < NUM_CAST; i++) begin : gen_cast_units assign operand [i] = src_i[i*WIDTH+:WIDTH]; @@ -58,7 +58,7 @@ generate .rnd_mode_i ( fpnew_pkg::RNE ), .op_i ( Operation ), .op_mod_i ( '0 ), - .src_fmt_i ( src_format ), + .src_fmt_i ( SrcFormat ), .dst_fmt_i ( dst_fmt_i ), .int_fmt_i ( INT_SRC ), .tag_i ( '0 ), @@ -77,11 +77,11 @@ generate .out_ready_i ( '1 ), .busy_o ( ) ); - + assign res [i*MIN_FMT+:MIN_FMT] = result[i][WIDTH-MIN_FMT-1:0]; - + end - + endgenerate assign dst_int = {{DATA_W-DW_CUT{1'b0}}, res[DATA_W-DW_CUT-1:0]}; diff --git a/rtl/redmule_ce.sv b/rtl/redmule_ce.sv index cea88f0..5609092 100644 --- a/rtl/redmule_ce.sv +++ b/rtl/redmule_ce.sv @@ -73,7 +73,7 @@ cluster_clock_gating stage2_noncomp_clk_gating ( logic [NumPipeRegs-1:0][BITW-1:0] noncomp_y_q; generate - for (genvar i = 0; i < NumPipeRegs; i++) begin : noncomp_input_pipe + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_noncomp_input_pipe always_ff @(posedge stage2_noncomp_input_pipe_clk, negedge rst_ni) begin if (~rst_ni) begin noncomp_y_q[i] <= '0; diff --git a/rtl/redmule_complex.sv b/rtl/redmule_complex.sv index caa7e57..617d009 100644 --- a/rtl/redmule_complex.sv +++ b/rtl/redmule_complex.sv @@ -15,35 +15,42 @@ module redmule_complex import hwpe_ctrl_package::*; import hwpe_stream_package::*; #( - parameter core_type_e CoreType = CV32X , // CV32E40P, IBEX, SNITCH, CVA6 - parameter int unsigned ID_WIDTH = 8 , - parameter int unsigned N_CORES = 8 , - parameter int unsigned DW = DATA_W , // TCDM port dimension (in bits) - parameter int unsigned MP = DW/redmule_pkg::MemDw, - parameter int unsigned NumIrqs = 32 , - parameter int unsigned AddrWidth = 32 , - parameter int unsigned XPulp = 0 , - parameter int unsigned FpuPresent = 0 , - parameter int unsigned Zfinx = 0 , - parameter type core_data_req_t = logic , - parameter type core_data_rsp_t = logic , - parameter type core_inst_req_t = logic , - parameter type core_inst_rsp_t = logic , - parameter type redmule_data_req_t = logic , - parameter type redmule_data_rsp_t = logic , - localparam fp_format_e FpFormat = FPFORMAT , // Data format (default is FP16) - localparam int unsigned Height = ARRAY_HEIGHT , // Number of PEs within a row - localparam int unsigned Width = ARRAY_WIDTH , // Number of parallel rows - localparam int unsigned NumPipeRegs = PIPE_REGS , // Number of pipeline registers within each PE - localparam pipe_config_t PipeConfig = DISTRIBUTED , - localparam int unsigned BITW = fp_width(FpFormat) // Number of bits for the given format + // CV32E40P, IBEX, SNITCH, CVA6 + parameter core_type_e CoreType = CV32X, + parameter int unsigned ID_WIDTH = 8, + parameter int unsigned N_CORES = 8, + // TCDM port dimension (in bits) + parameter int unsigned DW = DATA_W, + parameter int unsigned MP = DW/redmule_pkg::MemDw, + parameter int unsigned NumIrqs = 32, + parameter int unsigned AddrWidth = 32, + parameter int unsigned XPulp = 0, + parameter int unsigned FpuPresent = 0, + parameter int unsigned Zfinx = 0, + parameter type core_data_req_t = logic, + parameter type core_data_rsp_t = logic, + parameter type core_inst_req_t = logic, + parameter type core_inst_rsp_t = logic, + parameter type redmule_data_req_t = logic, + parameter type redmule_data_rsp_t = logic, + // Data format (default is FP16) + localparam fp_format_e FpFormat = FPFORMAT, + // Number of PEs within a row + localparam int unsigned Height = ARRAY_HEIGHT, + // Number of parallel rows + localparam int unsigned Width = ARRAY_WIDTH, + // Number of pipeline registers within each PE + localparam int unsigned NumPipeRegs = PIPE_REGS, + localparam pipe_config_t PipeConfig = DISTRIBUTED, + // Number of bits for the given format + localparam int unsigned BITW = fp_width(FpFormat) )( input logic clk_i , input logic rst_ni , input logic test_mode_i , input logic fetch_enable_i , - input logic [ AddrWidth-1:0] boot_addr_i , - input logic [ NumIrqs-1:0] irq_i , + input logic [ AddrWidth-1:0] boot_addr_i , + input logic [ NumIrqs-1:0] irq_i , output logic [$clog2(NumIrqs)-1:0] irq_id_o , output logic irq_ack_o , output logic core_sleep_o , @@ -62,8 +69,10 @@ logic busy; logic s_clk, s_clk_en; logic [N_CORES-1:0][1:0] evt; +// verilog_lint: waive-start line-length `HWPE_CTRL_TYPEDEF_REQ_T(redmule_ctrl_req_t, logic [31:0], logic [31:0], logic [3:0], logic [ID_WIDTH-1:0]) `HWPE_CTRL_TYPEDEF_RSP_T(redmule_ctrl_rsp_t, logic [31:0], logic [ID_WIDTH-1:0]) +// verilog_lint: waive-stop line-length core_inst_req_t core_inst_req; core_inst_rsp_t core_inst_rsp; diff --git a/rtl/redmule_ctrl.sv b/rtl/redmule_ctrl.sv index 23b091e..d202cc9 100644 --- a/rtl/redmule_ctrl.sv +++ b/rtl/redmule_ctrl.sv @@ -10,17 +10,17 @@ import redmule_pkg::*; module redmule_ctrl import hwpe_ctrl_package::*; #( -parameter int unsigned N_CORES = 8 , -parameter int unsigned IO_REGS = REDMULE_REGS , -parameter int unsigned ID_WIDTH = 8 , -parameter int unsigned SysDataWidth = 32 , -parameter int unsigned N_CONTEXT = 2 , -parameter int unsigned Height = 4 , -parameter int unsigned Width = 8 , -parameter int unsigned NumPipeRegs = 3 , -localparam int unsigned TILE = (NumPipeRegs +1)*Height, -localparam int unsigned W_ITERS = W_ITERS , -localparam int unsigned LEFT_PARAMS = LEFT_PARAMS + parameter int unsigned N_CORES = 8 , + parameter int unsigned IO_REGS = REDMULE_REGS , + parameter int unsigned ID_WIDTH = 8 , + parameter int unsigned SysDataWidth = 32 , + parameter int unsigned N_CONTEXT = 2 , + parameter int unsigned Height = 4 , + parameter int unsigned Width = 8 , + parameter int unsigned NumPipeRegs = 3 , + localparam int unsigned TILE = (NumPipeRegs +1)*Height, + localparam int unsigned W_ITERS = W_ITERS , + localparam int unsigned LEFT_PARAMS = LEFT_PARAMS )( input logic clk_i , input logic rst_ni , @@ -62,8 +62,16 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS logic [15:0] w_rows_iter, w_row_count_d, w_row_count_q; logic [15:0] z_storings_d, z_storings_q, tot_stores, issued_store_d, issued_store_q; - typedef enum logic [2:0] {REDMULE_IDLE, REDMULE_STARTING, REDMULE_COMPUTING, REDMULE_BUFFERING, REDMULE_STORING, REDMULE_FINISHED} redmule_ctrl_state; - redmule_ctrl_state current, next; + typedef enum logic [2:0] { + REDMULE_IDLE, + REDMULE_STARTING, + REDMULE_COMPUTING, + REDMULE_BUFFERING, + REDMULE_STORING, + REDMULE_FINISHED + } redmule_ctrl_state_e; + + redmule_ctrl_state_e current, next; hwpe_ctrl_package::ctrl_regfile_t reg_file_d, reg_file_q; hwpe_ctrl_package::ctrl_slave_t cntrl_slave; @@ -109,7 +117,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS if(~rst_ni) begin current <= REDMULE_IDLE; end else begin - if (clear) + if (clear) current <= REDMULE_IDLE; else current <= next; @@ -121,14 +129,14 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS if(~rst_ni) begin w_row_count_q <= '0; end else begin - if (clear) + if (clear) w_row_count_q <= '0; else w_row_count_q <= w_row_count_d; end end - always_ff @(posedge clk_i or negedge rst_ni) begin + always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin count_w_q <= 1'b0; end else begin @@ -195,7 +203,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS end else begin if (last_w_row_rst || clear) last_w_row <= 1'b0; - else if (last_w_row_en) + else if (last_w_row_en) last_w_row <= 1'b1; end end @@ -219,7 +227,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS if(~rst_ni) begin z_storings_q <= '0; end else begin - if (clear || storing_rst) + if (clear || storing_rst) z_storings_q <= '0; else z_storings_q <= z_storings_d; @@ -249,7 +257,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS /*---------------------------------------------------------------------------------------------*/ /* Controller FSM */ /*---------------------------------------------------------------------------------------------*/ - // This is a local FSM who's only work is to make the first + // This is a local FSM who's only work is to make the first // input load operation and to start the redmule_scheduler always_comb begin : controller_fsm tiler_setback = 1'b0; @@ -290,10 +298,10 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS finish_d = 1'b0; next = REDMULE_STARTING; end - else + else next = REDMULE_IDLE; end - + REDMULE_STARTING: begin w_shift_o = 1'b0; cntrl_scheduler_o.first_load = 1'b1; @@ -307,7 +315,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS REDMULE_COMPUTING: begin if (w_loaded_i) w_row_count_d = w_row_count_q + 1; - + if (w_row_count_d == Height && !count_w_q) w_computed_en = 1'b1; else if (w_row_count_q == w_rows_iter) begin @@ -316,7 +324,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS if (!last_w_row) last_w_row_en = 1'b1; end - + case (last_w_row) 1'b0: begin if (w_computed == NumPipeRegs) begin @@ -343,7 +351,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS end endcase end - + REDMULE_BUFFERING: begin z_buffer_clk_en = 1'b1; if (last_w_row) @@ -358,10 +366,10 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS else next = REDMULE_BUFFERING; end - + REDMULE_STORING: begin cntrl_scheduler_o.storing = 1'b1; - + if (w_loaded_i) w_row_count_d = w_row_count_q + 1; @@ -377,7 +385,7 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS end end end - + REDMULE_FINISHED: begin cntrl_slave.done = 1'b1; busy_o = 1'b0; @@ -399,6 +407,6 @@ localparam int unsigned LEFT_PARAMS = LEFT_PARAMS /* Other combinational assigmnets */ /*---------------------------------------------------------------------------------------------*/ assign evt_o = flgs_slave.evt[7:0]; - assign clear_o = clear; + assign clear_o = clear; endmodule : redmule_ctrl diff --git a/rtl/redmule_engine.sv b/rtl/redmule_engine.sv index 440f90d..20963b7 100644 --- a/rtl/redmule_engine.sv +++ b/rtl/redmule_engine.sv @@ -10,27 +10,25 @@ module redmule_engine import redmule_pkg::*; #( parameter fp_format_e FpFormat = FP16 , - parameter int unsigned Height = 4 , // Number of PEs per row - parameter int unsigned Width = 8 , // Number of parallel index + parameter int unsigned Height = 4 , // Number of PEs per row + parameter int unsigned Width = 8 , // Number of parallel index parameter int unsigned NumPipeRegs = 3 , parameter pipe_config_t PipeConfig = DISTRIBUTED , parameter type TagType = logic , parameter type AuxType = logic , - localparam int unsigned BITW = fpnew_pkg::fp_width(FpFormat), // Number of bits for the given format + localparam int unsigned BITW = fpnew_pkg::fp_width(FpFormat), // Number of bits for the given format localparam int unsigned H = Height , localparam int unsigned W = Width , localparam int unsigned DELAY = NumPipeRegs+1 )( input logic clk_i , input logic rst_ni , - // Input Elements + // Input Elements input logic [W-1:0][H-1:0][BITW-1:0] x_input_i , // Inputs to be loaded inside the buffer - input logic [H-1:0][BITW-1:0] w_input_i ,// Weights to be streamed inside the datapath + input logic [H-1:0][BITW-1:0] w_input_i , // Weights to be streamed inside the datapath input logic [W-1:0] [BITW-1:0] y_bias_i , - // Output Result + // Output Result output logic [W-1:0] [BITW-1:0] z_output_o , // Outputs computations - // input cntrl_engine_t ctrl_i [W-1:0][H-1:0], - // output flgs_engine_t flags_o [W-1:0][H-1:0] // Control signal for successive accumulations input logic accumulate_i , // fpnew_fma Input Signals @@ -43,19 +41,19 @@ module redmule_engine input logic op_mod_i , input TagType tag_i , input AuxType aux_i , - // fpnew_fma Input Handshake + // fpnew_fma Input Handshake input logic in_valid_i , output logic [W-1:0][H-1:0] in_ready_o , input logic reg_enable_i , input logic flush_i , - // fpnew_fma Output signals + // fpnew_fma Output signals output fpnew_pkg::status_t [W-1:0][H-1:0] status_o , output logic [W-1:0][H-1:0] extension_bit_o , output fpnew_pkg::classmask_e [W-1:0][H-1:0] class_mask_o , output logic [W-1:0][H-1:0] is_class_o , output TagType [W-1:0][H-1:0] tag_o , output AuxType [W-1:0][H-1:0] aux_o , - // fpnew_fma Output handshake + // fpnew_fma Output handshake output logic [W-1:0][H-1:0] out_valid_o , input logic out_ready_i , // fpnew_fma Indication of valid data in flight @@ -73,7 +71,7 @@ logic [W-1:0] row_clk; logic [W-1:0] [BITW-1:0] result, feedback; generate - for (genvar index = 0; index < W; index++) begin + for (genvar index = 0; index < W; index++) begin: gen_redmule_rows /*--------------------------------------- Array ----------------------------------------*/ tc_clk_gating i_row_clk_gating ( .clk_i ( clk_i ), @@ -117,7 +115,7 @@ generate .out_ready_i ( out_ready_i ), .busy_o ( busy_o [index] ) ); - + // In case input matrix is bigger than the array, we feedback the partial results to continue the computation always_comb begin : partial_product_feedback feedback[index] = y_bias_i[index]; @@ -126,7 +124,7 @@ generate else feedback[index] = y_bias_i[index]; end - end + end endgenerate assign z_output_o = result; diff --git a/rtl/redmule_inst_decoder.sv b/rtl/redmule_inst_decoder.sv index fc8e2dd..d65e90e 100644 --- a/rtl/redmule_inst_decoder.sv +++ b/rtl/redmule_inst_decoder.sv @@ -5,7 +5,7 @@ // Yvan Tortorella // -module redmule_inst_decoder +module redmule_inst_decoder import redmule_pkg::*; import cv32e40x_pkg::*; #( @@ -46,8 +46,13 @@ logic cfg_ready; logic count_rst, count_update; logic [NumCfgRegs-1:0] reg_offs; -typedef enum logic [1:0] {Idle, WriteCfg, Trigger} redmule_instr_cfg_state; -redmule_instr_cfg_state current, next; +typedef enum logic [1:0] { + Idle, + WriteCfg, + Trigger +} redmule_instr_cfg_state_e; + +redmule_instr_cfg_state_e current, next; // Xif static binding assign xif_compressed_if_i.compressed_ready = 1'b0; @@ -174,7 +179,7 @@ always_comb begin : cfg_fsm if (cfg_ready) next = WriteCfg; end - + WriteCfg: begin cfg_req_o.req = 1'b1; cfg_req_o.wen = 1'b0; diff --git a/rtl/redmule_pkg.sv b/rtl/redmule_pkg.sv index b60aa0e..91b36a6 100644 --- a/rtl/redmule_pkg.sv +++ b/rtl/redmule_pkg.sv @@ -57,9 +57,9 @@ package redmule_pkg; parameter int unsigned X_ITERS = 3; // 0x0C --> [31:16] -> ROWS ITERATIONS, [15:0] -> COLUMNS ITERATIONS parameter int unsigned W_ITERS = 4; // 0x10 --> [31:16] -> ROWS ITERATIONS, [15:0] -> COLUMNS ITERATIONS // Number of rows and columns leftovers (8 bits for each) - // [31:24] -> X/Y ROWS LEFTOVERS + // [31:24] -> X/Y ROWS LEFTOVERS // [23:16] -> X COLUMNS LEFTOVERS - // [15:8] -> W ROWS LEFTOVERS + // [15:8] -> W ROWS LEFTOVERS // [7:0] -> W/Y COLUMNS LEFTOVERS parameter int unsigned LEFTOVERS = 5; // 0x14 // We keep a register for the remaining params @@ -199,7 +199,7 @@ package redmule_pkg; logic [ARRAY_WIDTH-1:0][ARRAY_HEIGHT-1:0] out_valid; logic [ARRAY_WIDTH-1:0][ARRAY_HEIGHT-1:0] busy; } flgs_engine_t; - + typedef struct packed { logic start_fsm; logic first_load; diff --git a/rtl/redmule_row.sv b/rtl/redmule_row.sv index c7a790b..6bfd0ca 100644 --- a/rtl/redmule_row.sv +++ b/rtl/redmule_row.sv @@ -23,9 +23,9 @@ module redmule_row input logic [H-1:0][BITW-1:0] x_input_i , input logic [H-1:0][BITW-1:0] w_input_i , input logic [BITW-1:0] y_bias_i , - // Output Result + // Output Result output logic [BITW-1:0] z_output_o , - // fpnew_fma Input Signals + // fpnew_fma Input Signals input logic [2:0] fma_is_boxed_i , input logic [1:0] noncomp_is_boxed_i, input fpnew_pkg::roundmode_e stage1_rnd_i , @@ -47,10 +47,10 @@ module redmule_row output logic [H-1:0] is_class_o , output TagType [H-1:0] tag_o , output AuxType [H-1:0] aux_o , - // fpnew_fma Output handshake + // fpnew_fma Output handshake output logic [H-1:0] out_valid_o , input logic out_ready_i , - // fpnew_fma Indication of valid data in flight + // fpnew_fma Indication of valid data in flight output logic [H-1:0] busy_o ); @@ -66,14 +66,14 @@ logic [H-1:0] [BITW-1:0] output_q; // Generate PEs generate - for (genvar index = 0; index < H; index++) begin : computing_element + for (genvar index = 0; index < H; index++) begin : gen_computing_element assign input_operands [index][0] = x_input_i [index]; assign input_operands [index][1] = w_input_i [index]; - if (index > 0) + if (index > 0) assign input_operands [index][2] = output_q [index-1]; else assign input_operands [index][2] = y_bias_i; - + redmule_ce #( .FpFormat ( FpFormat ), .NumPipeRegs ( NumPipeRegs ), @@ -109,7 +109,7 @@ generate .out_ready_i ( out_ready_i ), .busy_o ( busy_o [index] ) ); - end : computing_element + end endgenerate always_ff @(posedge clk_i or negedge rst_ni) begin : intermediate_output_register @@ -121,7 +121,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : intermediate_output_registe output_q [i] <= '0; else if (reg_enable_i) output_q [i] <= partial_result [i]; - else + else output_q [i] <= output_q [i]; end end diff --git a/rtl/redmule_scheduler.sv b/rtl/redmule_scheduler.sv index eea04c8..d12ea04 100644 --- a/rtl/redmule_scheduler.sv +++ b/rtl/redmule_scheduler.sv @@ -12,12 +12,12 @@ module redmule_scheduler import hwpe_ctrl_package::*; import hwpe_stream_package::*; #( -parameter int unsigned Height = ARRAY_HEIGHT, -parameter int unsigned Width = ARRAY_WIDTH , -parameter int unsigned NumPipeRegs = PIPE_REGS , -localparam int unsigned D = TOT_DEPTH , -localparam int unsigned H = Height , -localparam int unsigned W = Width + parameter int unsigned Height = ARRAY_HEIGHT, + parameter int unsigned Width = ARRAY_WIDTH , + parameter int unsigned NumPipeRegs = PIPE_REGS , + localparam int unsigned D = TOT_DEPTH , + localparam int unsigned H = Height , + localparam int unsigned W = Width )( /********************************************************/ /* Inputs */ @@ -169,8 +169,23 @@ fpnew_pkg::fp_format_e input_cast_src_fmt , localparam int unsigned JMP = NumByte*(DATA_W/MemDw - 1); localparam int unsigned NBYTES = BITW/8; -typedef enum logic [3:0] {ENGINE_IDLE, PRELOAD_Y, LOAD_Y, X_REQ, W_REQ, STORE_REQ, FIRST_LOAD, WAIT, - WAIT_ONE, WAIT_TWO, LOAD_X, LOAD_W, STORE, SKIP_W} redmule_fsm_state_e; +typedef enum logic [3:0] { + ENGINE_IDLE, + PRELOAD_Y, + LOAD_Y, + X_REQ, + W_REQ, + STORE_REQ, + FIRST_LOAD, + WAIT, + WAIT_ONE, + WAIT_TWO, + LOAD_X, + LOAD_W, + STORE, + SKIP_W +} redmule_fsm_state_e; + redmule_fsm_state_e current, next; always_comb begin : address_gen_signals @@ -203,7 +218,7 @@ always_comb begin : address_gen_signals cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d0_len = W; cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d0_stride = reg_file_i.hwpe_params[Z_D0_STRIDE]; cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d1_len = reg_file_i.hwpe_params[W_ITERS][15:0]; - cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d1_stride = JMP; + cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d1_stride = JMP; cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d2_stride = reg_file_i.hwpe_params[Z_D2_STRIDE]; cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.dim_enable_1h = 2'b11; // Here we initialize the streamer sink signals for @@ -226,7 +241,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : state_register if(~rst_ni) begin current <= ENGINE_IDLE; end else begin - if (clear_i || clear_regs || cntrl_scheduler_i.rst) + if (clear_i || clear_regs || cntrl_scheduler_i.rst) current <= ENGINE_IDLE; else current <= next; @@ -237,7 +252,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : tot_weights_loaded if(~rst_ni) begin tot_w_loaded_q <= '0; end else begin - if (clear_i || clear_regs) + if (clear_i || clear_regs) tot_w_loaded_q <= '0; else tot_w_loaded_q <= tot_w_loaded_d; @@ -248,7 +263,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : tot_x_loaded if(~rst_ni) begin tot_x_loaded_q <= '0; end else begin - if (clear_i || clear_regs) + if (clear_i || clear_regs) tot_x_loaded_q <= '0; else tot_x_loaded_q <= tot_x_loaded_d; @@ -259,7 +274,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : tot_y_loaded if(~rst_ni) begin tot_y_loaded_q <= '0; end else begin - if (clear_i || clear_regs) + if (clear_i || clear_regs) tot_y_loaded_q <= '0; else tot_y_loaded_q <= tot_y_loaded_d; @@ -270,7 +285,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : tot_z_stored if(~rst_ni) begin tot_z_stored_q <= '0; end else begin - if (clear_i || clear_regs) + if (clear_i || clear_regs) tot_z_stored_q <= '0; else tot_z_stored_q <= tot_z_stored_d; @@ -281,7 +296,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : w_load_counter if(~rst_ni) begin w_loaded_q <= '0; end else begin - if (clear_i || clear_regs) + if (clear_i || clear_regs) w_loaded_q <= '0; else w_loaded_q <= w_loaded_d; @@ -305,7 +320,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : transfer_counter if(~rst_ni) begin transfer_count_q <= '0; end else begin - if (clear_i || clear_regs || transfer_rst) + if (clear_i || clear_regs || transfer_rst) transfer_count_q <= '0; else transfer_count_q <= transfer_count_d; @@ -453,13 +468,13 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : y_rows_leftover_flag end assign y_rows_lftovr_o = y_rows_lftovr_q; -always_ff @(posedge clk_i or negedge rst_ni) begin +always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin tot_x_read_q <= '0; end else begin if (clear_i || clear_regs) tot_x_read_q <= '0; - else + else tot_x_read_q <= tot_x_read_d; end end @@ -468,7 +483,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : wait_states_counter if(~rst_ni) begin n_waits_q <= '0; end else begin - if (clear_i || clear_regs) + if (clear_i || clear_regs) n_waits_q <= '0; else n_waits_q <= n_waits_d; @@ -494,7 +509,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : new_weight_reg if(~rst_ni) begin new_w_q <= '0; end else begin - if (clear_i || clear_regs) + if (clear_i || clear_regs) new_w_q <= '0; else new_w_q <= new_w_d; @@ -739,7 +754,7 @@ logic [H-1:0][$clog2(D)-1:0] shift_count_q; logic [$clog2(H)-1:0] counter_index; logic [H-1:0] en_w, w_rst; -always_ff @(posedge clk_i or negedge rst_ni) begin +always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin en_w <= '0; end else begin @@ -862,7 +877,7 @@ assign x_rows_clk_gate_en = (reg_file_i.hwpe_params[X_ITERS][31:16] > 'd1) ? : 1'b1; // If M < L we need to clock gate the whole computation assign x_rows_lftovr_en = ((x_rows_iter_d == reg_file_i.hwpe_params[X_ITERS][31:16] - 1 && w_cols_d == '0) - || (reg_file_i.hwpe_params[X_ITERS][31:16] == 16'b1 && current == X_REQ)) + || (reg_file_i.hwpe_params[X_ITERS][31:16] == 'd1 && current == X_REQ)) && reg_file_i.hwpe_params[LEFTOVERS][31:24] != '0 && x_rows_lftovr_q == '0; @@ -916,7 +931,7 @@ cntrl_streamer_o.output_cast_dst_fmt = output_cast_dst_fmt; x_rows_rst = 1'b0; x_cols_rst = 1'b0; -// Other default signals +// Other default signals load_x_en = 1'b0; load_x_rst = 1'b0; load_y_en = 1'b0; @@ -1007,7 +1022,7 @@ clear_regs = 1'b0; if (reg_file_i.hwpe_params[OP_SELECTION][0]) begin next = PRELOAD_Y; z_buffer_clk_en = 1'b1; - if (reg_file_i.hwpe_params[X_ITERS][31:16] == 16'b1 && reg_file_i.hwpe_params[LEFTOVERS][31:24] != '0) + if (reg_file_i.hwpe_params[X_ITERS][31:16] == 'd1 && reg_file_i.hwpe_params[LEFTOVERS][31:24] != '0) y_rows_lftovr_en = 1'b1; end else next = X_REQ; @@ -1022,7 +1037,7 @@ clear_regs = 1'b0; if (y_fifo_valid_i && y_fifo_strb_i == '1 && !flgs_z_buffer_i.loaded) begin y_buffer_load_o = 1'b1; z_buffer_clk_en = 1'b1; - end + end if (flgs_z_buffer_i.loaded) begin next = X_REQ; y_preloaded_en = 1'b1; @@ -1057,7 +1072,7 @@ clear_regs = 1'b0; FIRST_LOAD: begin hold_en = 1'b1; flgs_scheduler_o.x_ready = 1'b1; - if (x_valid_i && x_strb_i == '1) begin + if (x_valid_i && x_strb_i == '1) begin cntrl_x_buffer_o.load = 1'b1; x_buffer_clk_en = 1'b1; end @@ -1082,14 +1097,14 @@ clear_regs = 1'b0; else next = W_REQ; end - 0: begin + 0: begin next = LOAD_W; end endcase if (!cntrl_scheduler_i.first_load) begin x_cols_offs_d = '0; x_rows_offs_d = x_rows_offs_q + reg_file_i.hwpe_params[X_ROWS_OFFS]; - end + end end SKIP_W: begin @@ -1182,7 +1197,7 @@ clear_regs = 1'b0; if (skip_w_q) skip_w_rst = 1'b1; - + if (w_valid_i == 1'b1 && w_strb_i == '1) begin w_loaded = 1'b1; count_w_cycles_en = (!count_w_cycles_q & x_preloaded_q) ? 1'b1 : 1'b0; @@ -1204,9 +1219,12 @@ clear_regs = 1'b0; if (loading_x_q) next = LOAD_X; - else if ( ((loading_y_q && !flgs_z_buffer_i.loaded) || - (y_preloaded_q && flgs_z_buffer_i.y_pushed && !(reg_file_i.hwpe_params[W_ITERS][15:0] == 16'b1 && reg_file_i.hwpe_params[X_ITERS][31:16] == 16'b1)) || //if only one iteration is needed to completely load Y, we skip the LOAD_Y state - (flgs_x_buffer_i.full && !y_loaded_q && y_fifo_valid_i)) && + else if ( ((loading_y_q && !flgs_z_buffer_i.loaded) || + (y_preloaded_q && flgs_z_buffer_i.y_pushed && + !(reg_file_i.hwpe_params[W_ITERS][15:0] == 'd1 && + // if only one iteration is needed to completely load Y, we skip the LOAD_Y state + reg_file_i.hwpe_params[X_ITERS][31:16] == 'd1)) || + (flgs_x_buffer_i.full && !y_loaded_q && y_fifo_valid_i)) && reg_file_i.hwpe_params[OP_SELECTION][0] ) begin next = LOAD_Y; @@ -1227,7 +1245,9 @@ clear_regs = 1'b0; next = STORE; n_waits_d = '0; store_count_d = '0; - if (reg_file_i.hwpe_params[X_ITERS][31:16] == 16'b1 && reg_file_i.hwpe_params[LEFTOVERS][31:24] != '0 && store_rows_lftovr_q == '0) + if (reg_file_i.hwpe_params[X_ITERS][31:16] == 'd1 && + reg_file_i.hwpe_params[LEFTOVERS][31:24] != '0 && + store_rows_lftovr_q == '0) store_rows_lftovr_en = 1'b1; end else begin next = WAIT; @@ -1257,7 +1277,7 @@ clear_regs = 1'b0; if (!shift_lock_q) shift_lock_en = 1'b1; - + if (flgs_x_buffer_i.full || tot_x_loaded_q == W) begin load_x_rst = 1'b1; end @@ -1267,7 +1287,7 @@ clear_regs = 1'b0; transfer_count_d = transfer_count_q + 1; tot_x_loaded_d = tot_x_loaded_q + 1; end - + if (tot_x_loaded_d == ((x_rows_lftovr_q == '0) ? W : x_rows_lftovr_q)) begin tot_x_loaded_d = '0; load_x_rst = 1'b1; @@ -1286,7 +1306,7 @@ clear_regs = 1'b0; n_waits_d = '0; end end - + if (flgs_streamer_i.w_stream_source_flags.ready_start) begin if (x_rows_iter_q == reg_file_i.hwpe_params[X_ITERS][31:16]) cntrl_streamer_o.w_stream_source_ctrl.req_start = 1'b0; @@ -1299,17 +1319,16 @@ clear_regs = 1'b0; n_waits_d = n_waits_q + 1; flgs_scheduler_o.y_ready = 1'b1; load_y_en = (loading_y_q) ? 1'b0 : 1'b1; - + if (!shift_lock_q) shift_lock_en = 1'b1; - + if (flgs_z_buffer_i.loaded || tot_y_loaded_q == W - 1) begin load_y_rst = (loading_y_q) ? 1'b1 : 1'b0; y_preloaded_rst = (y_preloaded_q) ? 1'b1 : 1'b0; y_cols_iter_d = y_cols_iter_q + 1; y_cols_lftovr_rst = (y_cols_lftovr_q != '0) ? 1'b1 : 1'b0; - if (y_cols_iter_q == reg_file_i.hwpe_params[W_ITERS][15:0] - 1) y_rows_iter_d = y_rows_iter_q + 1; end @@ -1335,8 +1354,8 @@ clear_regs = 1'b0; end else if (n_waits_q > 1 || flgs_z_buffer_i.loaded) begin next = LOAD_W; n_waits_d = '0; - end - end + end + end end WAIT: begin @@ -1361,7 +1380,7 @@ clear_regs = 1'b0; end n_waits_d = n_waits_q + 1; - + if (w_cols_q == reg_file_i.hwpe_params[W_ITERS][15:0]) begin if ( (reg_file_i.hwpe_params[LEFTOVERS][7:0] != '0) && (w_cols_lftovr == '0) ) w_cols_lftovr_en = 1'b1; @@ -1384,11 +1403,11 @@ clear_regs = 1'b0; if (x_cols_lftovr_q != '0 && (flgs_x_buffer_i.full) ) x_cols_lftovr_rst = 1'b1; - if (reg_file_i.hwpe_params[X_ITERS][15:0] > 16'd1) begin // X Matrix N dimension is larger than the number of elements we read through the streamer port + if (reg_file_i.hwpe_params[X_ITERS][15:0] > 'd1) begin // X Matrix N dimension is larger than the number of elements we read through the streamer port if (flgs_streamer_i.x_stream_source_flags.ready_start) begin - if (flgs_streamer_i.w_stream_source_flags.ready_start) + if (flgs_streamer_i.w_stream_source_flags.ready_start) cntrl_streamer_o.x_stream_source_ctrl.req_start = 1'b0; else begin @@ -1403,7 +1422,7 @@ clear_regs = 1'b0; if (x_cols_lftovr_q != '0 && !loading_x_q) x_cols_lftovr_rst = 1'b1; end else if (tot_store_q == reg_file_i.hwpe_params[LEFT_PARAMS][31:16] - 1) begin - if (x_cols_iter_q < reg_file_i.hwpe_params[X_ITERS][15:0] && tot_x_read_q < reg_file_i.hwpe_params[TOT_X_READ] - 1 || reg_file_i.hwpe_params[X_ITERS][31:16] == 16'b1) begin + if (x_cols_iter_q < reg_file_i.hwpe_params[X_ITERS][15:0] && tot_x_read_q < reg_file_i.hwpe_params[TOT_X_READ] - 1 || reg_file_i.hwpe_params[X_ITERS][31:16] == 'd1) begin cntrl_streamer_o.x_stream_source_ctrl.req_start = 1'b1; tot_x_read_d = tot_x_read_q + 1; end @@ -1441,8 +1460,10 @@ clear_regs = 1'b0; w_iters_d = '0; end end - end else begin // X Matrix N channel is equal or smaller than the number of elements we read from the streamer port - if (flgs_streamer_i.w_stream_source_flags.ready_start) + end else begin + // X Matrix N channel is equal or smaller than the number + // of elements we read from the streamer port + if (flgs_streamer_i.w_stream_source_flags.ready_start) cntrl_streamer_o.x_stream_source_ctrl.req_start = 1'b0; else begin @@ -1486,12 +1507,12 @@ clear_regs = 1'b0; if (y_cols_iter_q == reg_file_i.hwpe_params[W_ITERS][15:0]) y_cols_iter_d = 16'd0; end - + if (tot_w_loaded_q == reg_file_i.hwpe_params[W_ITERS][31:16] && !loading_x_q) begin tot_w_loaded_d = '0; w_iters_d = w_iters_q + 1; end - if (n_waits_q == (NumPipeRegs - 1) && !(flgs_streamer_i.w_stream_source_flags.ready_start & fifo_flgs_i.empty)) begin + if (n_waits_q == (NumPipeRegs - 1) && !(flgs_streamer_i.w_stream_source_flags.ready_start & fifo_flgs_i.empty)) begin n_waits_d = '0; next = LOAD_W; if (last_store) begin @@ -1502,7 +1523,7 @@ clear_regs = 1'b0; end_computation = 1'b1; if (cntrl_scheduler_i.storing) begin next = STORE; - if (reg_file_i.hwpe_params[X_ITERS][31:16] == 16'b1 && reg_file_i.hwpe_params[LEFTOVERS][31:24] != '0 && store_rows_lftovr_q == '0) + if (reg_file_i.hwpe_params[X_ITERS][31:16] == 'd1 && reg_file_i.hwpe_params[LEFTOVERS][31:24] != '0 && store_rows_lftovr_q == '0) store_rows_lftovr_en = 1'b1; end else next = WAIT; @@ -1512,14 +1533,13 @@ clear_regs = 1'b0; d_shift_d = '0; //This handles the case where the number of iterations on X rows is 2 but we have a leftover <= H - if (x_cols_iter_d == reg_file_i.hwpe_params[X_ITERS][15:0] - 1 && + if (x_cols_iter_d == reg_file_i.hwpe_params[X_ITERS][15:0] - 1 && !(x_rows_iter_d == '0 && w_iters_d == '0) && - reg_file_i.hwpe_params[X_ITERS][15:0] == 16'd2 && - reg_file_i.hwpe_params[LEFTOVERS][23:16] <= H && + reg_file_i.hwpe_params[X_ITERS][15:0] == 'd2 && + reg_file_i.hwpe_params[LEFTOVERS][23:16] <= H && reg_file_i.hwpe_params[LEFTOVERS][23:16] != '0 && tot_x_read_d != reg_file_i.hwpe_params[TOT_X_READ]) skip_w_en = 1'b1; - end end @@ -1558,7 +1578,7 @@ clear_regs = 1'b0; n_waits_d = (W == H*NumPipeRegs) ? (NumPipeRegs - 1) : n_waits_q; store_count_d = '0; tot_z_stored_d = '0; - + if (x_rows_lftovr_q != '0 && store_rows_lftovr_q == '0) store_rows_lftovr_en = 1'b1; diff --git a/rtl/redmule_streamer.sv b/rtl/redmule_streamer.sv index 61f5958..7058639 100644 --- a/rtl/redmule_streamer.sv +++ b/rtl/redmule_streamer.sv @@ -31,7 +31,7 @@ module redmule_streamer hwpe_stream_intf_stream.sink z_stream_i, // TCDM interface between the streamer and the memory hci_core_intf.master tcdm , - + // Control signals input cntrl_streamer_t ctrl_i, output flgs_streamer_t flags_o @@ -75,7 +75,7 @@ hci_core_intf #( .DW ( DW ), hci_core_sink #( .DATA_WIDTH ( DW ), .MISALIGNED_ACCESSES ( REALIGN ) -) i_stream_sink ( +) i_stream_sink ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), @@ -102,7 +102,7 @@ assign cast = (ctrl_i.input_cast_src_fmt == fpnew_pkg::FP16) ? 1'b0: 1'b1; redmule_castout #( .FpFmtConfig ( FpFmtConfig ), .IntFmtConfig ( IntFmtConfig ), - .src_format ( FPFORMAT ) + .SrcFormat ( FPFORMAT ) ) i_store_cast ( .clk_i , .rst_ni , @@ -217,7 +217,7 @@ for (genvar i = 0; i < NumStreamSources; i++) begin: gen_tcdm2stream redmule_castin #( .FpFmtConfig ( FpFmtConfig ), .IntFmtConfig ( IntFmtConfig ), - .dst_format ( FPFORMAT ) + .DstFormat ( FPFORMAT ) ) i_load_cast ( .clk_i , .rst_ni , @@ -256,7 +256,7 @@ for (genvar i = 0; i < NumStreamSources; i++) begin: gen_tcdm2stream .ctrl_i ( source_ctrl[i] ), .flags_o ( source_flags[i] ) ); - + end // Assign flags in the vector to the relative output buses. diff --git a/rtl/redmule_top.sv b/rtl/redmule_top.sv index e459d99..5f7797c 100644 --- a/rtl/redmule_top.sv +++ b/rtl/redmule_top.sv @@ -176,7 +176,7 @@ hwpe_stream_intf_stream #( .DATA_WIDTH ( DATAW_ALIGN ) ) z_buffer_fifo ( .clk( c // The streamer will present a single master TCDM port used to stream data to and from the memeory. redmule_streamer #( - .DW ( DW ) + .DW ( DW ) ) i_streamer ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), @@ -199,11 +199,11 @@ redmule_streamer #( hwpe_stream_fifo #( .DATA_WIDTH ( DATAW_ALIGN ), .FIFO_DEPTH ( 4 ) -) i_x_buffer_fifo ( +) i_x_buffer_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .clear_i ( clear ), - .flags_o ( ), + .flags_o ( ), .push_i ( x_buffer_d ), .pop_o ( x_buffer_fifo ) ); @@ -215,7 +215,7 @@ hwpe_stream_fifo #( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .clear_i ( clear ), - .flags_o ( w_fifo_flgs ), + .flags_o ( w_fifo_flgs ), .push_i ( w_buffer_d ), .pop_o ( w_buffer_fifo ) ); @@ -227,7 +227,7 @@ hwpe_stream_fifo #( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .clear_i ( clear ), - .flags_o ( ), + .flags_o ( ), .push_i ( y_buffer_d ), .pop_o ( y_buffer_fifo ) ); @@ -235,11 +235,11 @@ hwpe_stream_fifo #( hwpe_stream_fifo #( .DATA_WIDTH ( DATAW_ALIGN ), .FIFO_DEPTH ( 2 ) -) i_z_buffer_fifo ( +) i_z_buffer_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .clear_i ( clear ), - .flags_o ( ), + .flags_o ( ), .push_i ( z_buffer_q ), .pop_o ( z_buffer_fifo ) ); @@ -342,7 +342,7 @@ logic accumulate, engine_flush; logic [2:0] fma_is_boxed; logic [1:0] noncomp_is_boxed; roundmode_e stage1_rnd, - stage2_rnd; + stage2_rnd; operation_e op1, op2; logic op_mod; logic in_tag; @@ -359,7 +359,7 @@ classmask_e [Width-1:0][Height-1:0] class_mask; logic [Width-1:0][Height-1:0] is_class; logic [Width-1:0][Height-1:0] out_tag; logic [Width-1:0][Height-1:0] out_aux; -// fpnew_fma Output handshake +// fpnew_fma Output handshake logic [Width-1:0][Height-1:0] out_valid; logic out_ready; // fpnew_fma Indication of valid data in flight @@ -443,7 +443,7 @@ redmule_ctrl #( .Height ( Height ), .Width ( Width ), .NumPipeRegs ( NumPipeRegs ) -) i_control ( +) i_control ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), @@ -465,7 +465,7 @@ redmule_ctrl #( .cntrl_scheduler_o ( cntrl_scheduler ), .periph ( periph ) ); - + /*---------------------------------------------------------------*/ /* | Local FSM | */ @@ -475,7 +475,7 @@ redmule_scheduler #( .Height ( Height ), .Width ( Width ), .NumPipeRegs ( NumPipeRegs ) -) i_scheduler ( +) i_scheduler ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), diff --git a/rtl/redmule_w_buffer.sv b/rtl/redmule_w_buffer.sv index a72b6c9..8486af5 100644 --- a/rtl/redmule_w_buffer.sv +++ b/rtl/redmule_w_buffer.sv @@ -9,12 +9,12 @@ module redmule_w_buffer import fpnew_pkg::*; import redmule_pkg::*; #( -parameter int unsigned DW = 288 , -parameter fp_format_e FpFormat = FP16 , -parameter int unsigned Height = ARRAY_HEIGHT , // Number of PEs per row -localparam int unsigned BITW = fp_width(FpFormat), // Number of bits for the given format -localparam int unsigned H = Height , -localparam int unsigned D = DW/BITW + parameter int unsigned DW = 288 , + parameter fp_format_e FpFormat = FP16 , + parameter int unsigned Height = ARRAY_HEIGHT , // Number of PEs per row + localparam int unsigned BITW = fp_width(FpFormat), // Number of bits for the given format + localparam int unsigned H = Height , + localparam int unsigned D = DW/BITW )( input logic clk_i , input logic rst_ni , @@ -49,8 +49,8 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : w_trailer for (int d = 0; d < D; d++) w_buffer_q[h][d] <= (d < D - 1) ? w_buffer_q[h][d+1] : '0; end - end else - w_buffer_q <= w_buffer_q; + end else + w_buffer_q <= w_buffer_q; end end @@ -58,13 +58,13 @@ end always_ff @(posedge clk_i or negedge rst_ni) begin : row_load_counter if(~rst_ni) begin w_row <= '0; - end else begin + end else begin if (clear_i || w_row == H ) w_row <= '0; else if (ctrl_i.load) - w_row <= w_row + 1; + w_row <= w_row + 1; else - w_row <= w_row; + w_row <= w_row; end end diff --git a/rtl/redmule_wrap.sv b/rtl/redmule_wrap.sv index 2a226ce..7b4d2d2 100644 --- a/rtl/redmule_wrap.sv +++ b/rtl/redmule_wrap.sv @@ -132,7 +132,7 @@ logic [N_CORES-1:0][1:0] evt; end `else generate - for(genvar ii=0; ii