FpNew RTL fix

tinebp · tinebp · commit 4a11c1ec0f05 · 2024-06-14T16:29:52.000-07:00
diff --git a/README.md b/README.md
@@ -88,4 +88,9 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
 - Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
     ```sh
     $ ../configure
-    ```
+    ```
+- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
+    ```sh
+    $ ./ci/blackbox.sh --app=demo --debug=3
+    ```
+- For additional information, check out the /docs.
diff --git a/ci/regression.sh.in b/ci/regression.sh.in
@@ -78,7 +78,6 @@ isa()
 
     if [ "$XLEN" == "64" ]
     then
-
         make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
         make -C tests/riscv/isa run-rtlsim-64d
 
diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv
@@ -1,10 +1,10 @@
 // Copyright © 2019-2023
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
-// 
+//
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -15,12 +15,12 @@
 
 `ifdef FPU_FPNEW
 
-module VX_fpu_fpnew 
-    import VX_fpu_pkg::*; 
-    import fpnew_pkg::*; 
-    import cf_math_pkg::*; 
+module VX_fpu_fpnew
+    import VX_fpu_pkg::*;
+    import fpnew_pkg::*;
+    import cf_math_pkg::*;
     import defs_div_sqrt_mvp::*;
-#(      
+#(
     parameter NUM_LANES = 1,
     parameter TAG_WIDTH = 1,
     parameter OUT_BUF   = 0
@@ -34,15 +34,15 @@ module VX_fpu_fpnew
     input wire [NUM_LANES-1:0] mask_in,
 
     input wire [TAG_WIDTH-1:0] tag_in,
-    
+
     input wire [`INST_FPU_BITS-1:0] op_type,
     input wire [`INST_FMT_BITS-1:0] fmt,
     input wire [`INST_FRM_BITS-1:0] frm,
 
     input wire [NUM_LANES-1:0][`XLEN-1:0]  dataa,
     input wire [NUM_LANES-1:0][`XLEN-1:0]  datab,
     input wire [NUM_LANES-1:0][`XLEN-1:0]  datac,
-    output wire [NUM_LANES-1:0][`XLEN-1:0] result, 
+    output wire [NUM_LANES-1:0][`XLEN-1:0] result,
 
     output wire has_fflags,
     output wire [`FP_FLAGS_BITS-1:0] fflags,
@@ -51,32 +51,27 @@ module VX_fpu_fpnew
 
     input wire  ready_out,
     output wire valid_out
-);  
+);
     localparam LATENCY_FDIVSQRT = `MAX(`LATENCY_FDIV, `LATENCY_FSQRT);
     localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH;
 
-`ifdef XLEN_64
-    // use scalar configuration for mixed formats
     localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
         Width:         unsigned'(`XLEN),
         EnableVectors: 1'b0,
+    `ifdef XLEN_64
         EnableNanBox:  1'b1,
     `ifdef FLEN_64
         FpFmtMask:     5'b11000,
     `else
-        FpFmtMask:     5'b11000, // TODO: added FP64 to fix CVT bug in FpNew
+        FpFmtMask:     5'b11000, // TODO: adding FP64 to fix CVT bug in FpNew
     `endif
         IntFmtMask:    4'b0011
-    };
-`else
-    localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
-        Width:         unsigned'(`XLEN * NUM_LANES),
-        EnableVectors: 1'b1,
+    `else
         EnableNanBox:  1'b0,
         FpFmtMask:     5'b10000,
         IntFmtMask:    4'b0010
+    `endif
     };
-`endif
 
     localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
       PipeRegs:'{'{`LATENCY_FMA, 0, 0, 0, 0}, // ADDMUL
@@ -89,12 +84,12 @@ module VX_fpu_fpnew
                   '{default: fpnew_pkg::MERGED}}, // CONV
       PipeConfig: fpnew_pkg::DISTRIBUTED
     };
-    
-    wire fpu_ready_in, fpu_valid_in;    
+
+    wire fpu_ready_in, fpu_valid_in;
     wire fpu_ready_out, fpu_valid_out;
 
     reg [TAG_WIDTH-1:0] fpu_tag_in, fpu_tag_out;
-    
+
     reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;
 
     wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result;
@@ -111,12 +106,12 @@ module VX_fpu_fpnew
 
     always @(*) begin
         fpu_op          = 'x;
-        fpu_rnd         = frm;  
-        fpu_op_mod      = 0;        
+        fpu_rnd         = frm;
+        fpu_op_mod      = 0;
         fpu_has_fflags  = 1;
         fpu_operands[0] = dataa;
         fpu_operands[1] = datab;
-        fpu_operands[2] = datac;    
+        fpu_operands[2] = datac;
         fpu_dst_fmt     = fpnew_pkg::FP32;
         fpu_int_fmt     = fpnew_pkg::INT32;
 
@@ -133,24 +128,24 @@ module VX_fpu_fpnew
     `endif
 
         fpu_src_fmt = fpu_dst_fmt;
-        
+
         case (op_type)
             `INST_FPU_ADD: begin
                 fpu_op = fpnew_pkg::ADD;
                 fpu_operands[1] = dataa;
                 fpu_operands[2] = datab;
             end
-            `INST_FPU_SUB: begin 
-                fpu_op = fpnew_pkg::ADD; 
+            `INST_FPU_SUB: begin
+                fpu_op = fpnew_pkg::ADD;
                 fpu_operands[1] = dataa;
                 fpu_operands[2] = datab;
-                fpu_op_mod = 1; 
+                fpu_op_mod = 1;
             end
             `INST_FPU_MUL:   begin fpu_op = fpnew_pkg::MUL; end
             `INST_FPU_DIV:   begin fpu_op = fpnew_pkg::DIV; end
             `INST_FPU_SQRT:  begin fpu_op = fpnew_pkg::SQRT; end
             `INST_FPU_MADD:  begin fpu_op = fpnew_pkg::FMADD; end
-            `INST_FPU_MSUB:  begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end            
+            `INST_FPU_MSUB:  begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
             `INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
             `INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
         `ifdef FLEN_64
@@ -164,30 +159,18 @@ module VX_fpu_fpnew
             `INST_FPU_MISC:begin
                 case (frm)
                     0,1,2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = {1'b0, frm[1:0]}; fpu_has_fflags = 0; end // FSGNJ
-                    3:     begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end // CLASS                     
+                    3:     begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end // CLASS
                     4,5:   begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = 3'b011; fpu_op_mod = ~frm[0]; fpu_has_fflags = 0; end // FMV.X.W, FMV.W.X
                     6,7:   begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = {2'b00, frm[0]}; end // MIN, MAX
-                endcase    
+                endcase
             end
             default:;
         endcase
-
-    `ifdef FPU_RV64F
-        // apply nan-boxing to floating-point operands
-        for (integer i = 0; i < NUM_LANES; ++i) begin                    
-            if (op_type != `INST_FPU_I2F && op_type != `INST_FPU_U2F) begin
-                fpu_operands[0][i] |= 64'hffffffff00000000;
-            end
-            fpu_operands[1][i] |= 64'hffffffff00000000;
-            fpu_operands[2][i] |= 64'hffffffff00000000;        
-        end
-    `endif
     end
 
-`ifdef XLEN_64
     `UNUSED_VAR (mask_in)
     for (genvar i = 0; i < NUM_LANES; ++i) begin
-        wire [(TAG_WIDTH+1)-1:0] fpu_tag;        
+        wire [(TAG_WIDTH+1)-1:0] fpu_tag;
         wire fpu_valid_out_uq;
         wire fpu_ready_in_uq;
         fpnew_pkg::status_t fpu_status_uq;
@@ -196,10 +179,12 @@ module VX_fpu_fpnew
         `UNUSED_VAR (fpu_ready_in_uq)
         `UNUSED_VAR (fpu_status_uq)
 
-        fpnew_top #( 
+        fpnew_top #(
             .Features       (FPU_FEATURES),
             .Implementation (FPU_IMPLEMENTATION),
-            .TagType        (logic[(TAG_WIDTH+1)-1:0])
+            .TagType        (logic[(TAG_WIDTH+1)-1:0]),
+            .TrueSIMDClass  (1),
+            .EnableSIMDMask (1)
         ) fpnew_core (
             .clk_i          (clk),
             .rst_ni         (~reset),
@@ -210,9 +195,9 @@ module VX_fpu_fpnew
             .src_fmt_i      (fpu_src_fmt),
             .dst_fmt_i      (fpu_dst_fmt),
             .int_fmt_i      (fpu_int_fmt),
-            `UNUSED_PIN (vectorial_op_i),
-            `UNUSED_PIN (simd_mask_i),
-            .tag_i          ({fpu_tag_in, fpu_has_fflags}),            
+            .vectorial_op_i (1'b0),
+            .simd_mask_i    (mask_in[i]),
+            .tag_i          ({fpu_tag_in, fpu_has_fflags}),
             .in_valid_i     (fpu_valid_in),
             .in_ready_o     (fpu_ready_in_uq),
             .flush_i        (reset),
@@ -223,45 +208,14 @@ module VX_fpu_fpnew
             .out_ready_i    (fpu_ready_out),
             `UNUSED_PIN (busy_o)
         );
-        
+
         if (i == 0) begin
-            assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;            
+            assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;
             assign fpu_valid_out = fpu_valid_out_uq;
             assign fpu_ready_in = fpu_ready_in_uq;
             assign fpu_status = fpu_status_uq;
         end
     end
-`else
-    fpnew_top #( 
-        .Features       (FPU_FEATURES),
-        .Implementation (FPU_IMPLEMENTATION),
-        .TagType        (logic[(TAG_WIDTH+1)-1:0]),
-        .TrueSIMDClass  (1),
-        .EnableSIMDMask (1)
-    ) fpnew_core (
-        .clk_i          (clk),
-        .rst_ni         (~reset),
-        .operands_i     (fpu_operands),
-        .rnd_mode_i     (fpnew_pkg::roundmode_e'(fpu_rnd)),
-        .op_i           (fpu_op),
-        .op_mod_i       (fpu_op_mod),
-        .src_fmt_i      (fpu_src_fmt),
-        .dst_fmt_i      (fpu_dst_fmt),
-        .int_fmt_i      (fpu_int_fmt),
-        .vectorial_op_i (1'b1),
-        .simd_mask_i    (mask_in),
-        .tag_i          ({fpu_tag_in, fpu_has_fflags}),        
-        .in_valid_i     (fpu_valid_in),
-        .in_ready_o     (fpu_ready_in),
-        .flush_i        (reset),
-        .result_o       (fpu_result),
-        .status_o       (fpu_status),
-        .tag_o          ({fpu_tag_out, fpu_has_fflags_out}),
-        .out_valid_o    (fpu_valid_out),
-        .out_ready_i    (fpu_ready_out),
-        `UNUSED_PIN (busy_o)
-    );
-`endif
 
     assign fpu_valid_in = valid_in;
     assign ready_in = fpu_ready_in;