//====================================================================================
//  Copyright (C) BAY9, 2016
//====================================================================================
//
// MODULE:
//   ctfeMulSum
//
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

module ctfeMulSum(y, y_or, y_ff, y_we, 
                  x, x_ir, x_fe, x_re, 
                  run1, 
                  regBus, regWe, regRe, regWeOut, regReOut, clk1, clk2, reset);

  // --------------------------------------------------------------------------------------
  // Module parameters
  parameter y_w               = 0;
  parameter x_w               = 0;

  parameter run1_w            = 0;

  parameter wx                = 0;
  parameter wy                = 0;
  parameter sh                = 0;

  parameter xD_w              = 0;
  parameter xD_n              = 0;
  parameter xD_m              = 0;
  parameter x8D_w             = 0;
  parameter x8D_n             = 0;
  parameter x8D_m             = 0;

  // --------------------------------------------------------------------------------------
  // Inputs and outputs
  output            [y_w-1:0]  y;
  output                       y_or;
  input                        y_ff;
  output                       y_we;
  assign                       y_we = y_or & ~y_ff;

  input             [x_w-1:0]  x;
  output                       x_ir;
  input                        x_fe;
  output                       x_re;
  assign                       x_re = x_ir & ~x_fe;

  input          [run1_w-1:0]  run1;

  // Define clock and reset
  input                        clk1;
  input                        clk2;
  input                        reset;

  // Inputs and outputs for registers
  inout                [15:0]  regBus;
  input                        regWe;
  input                        regRe;
  output                       regWeOut;
  output                       regReOut;

  // Assign clock
  wire clk;
  assign clk = clk1;

  // --------------------------------------------------------------------------------------
  // External status registers
  // --------------------------------------------------------------------------------------
  // Internal status registers
  reg              [xD_w-1:0]  xD[xD_n-1:0];
  reg             [x8D_w-1:0]  x8D[x8D_n-1:0];

  // --------------------------------------------------------------------------------------
  // Define register control output
  assign regWeOut = 0;
  assign regReOut = 0;

// ----------------------------------------------------------------------------------------
//=========================================================================================

  // --------------------------------------------------------------------------------------
  // Register definitions
  reg signed     [wx-1:0]  xRe0, xIm0, xRe16, xIm16, mulIn0, mulIn1;
  reg signed   [2*wx-2:0]  mulOut;
  reg signed   [2*wx+2:0]  addOut, addIn0, accRe, accIm, add8In0P;
  reg signed   [2*wx+4:0]  acc8Re, acc8Im, add8In0, add8In1, add8Out; 
  reg               [4:0]  cntIn;
  reg               [3:0]  cnt8In;
  reg                      addInLast;
  
  integer k;
  
  // --------------------------------------------------------------------------------------
  // Assign input and output enable
  assign x_ir = run1 & cntIn[1:0]==0;
  assign y_or = cnt8In==13 | cnt8In==15;
  assign y    = acc8Re[2*wx+4:sh];
  
  // --------------------------------------------------------------------------------------
  // Signal processing behaviour
  //always @(*) begin
  always @(cntIn or cnt8In or accRe or accIm or 
           xD[ 0] or xD[ 1] or xD[ 2] or xD[ 3] or xD[ 4] or
           xD[ 5] or xD[ 6] or xD[ 7] or xD[ 8] or xD[ 9] or
           xD[10] or xD[11] or xD[12] or xD[13] or xD[14] or
           xD[15] or xD[16] or acc8Re or acc8Im or
           x8D[0] or x8D[1] or x8D[2] or x8D[3] or x8D[4] or x8D[5]) begin

    // Get inputs
    xRe0  = xD[ 0][  wx-1: 0];
    xIm0  = xD[ 0][2*wx-1:wx];
    xRe16 = xD[16][  wx-1: 0];
    xIm16 = xD[16][2*wx-1:wx];
        
    // Get multiplier inputs
    if (cntIn[1:0]==1) begin
      mulIn0 = xRe0;
      mulIn1 = xRe16;
    end else
    if (cntIn[1:0]==2) begin
      mulIn0 = xIm0;
      mulIn1 = xIm16;
    end else 
    if (cntIn[1:0]==3) begin
      mulIn0 = xRe0;
      mulIn1 = xIm16;
    end else 
    begin
      mulIn0 = xIm0;
      mulIn1 = -xRe16;
    end
    
    // Multiplication
    mulOut = mulIn0 * mulIn1;
    
    // Summation, use sign extended multipliation output
    if (cntIn==1 | cntIn==3) begin
      addIn0 = 0;
    end else 
    if (cntIn[1:0]==1 | cntIn[1:0]==2) begin
      addIn0 = accRe;
    end else begin
      addIn0 = accIm;
    end
    addOut = addIn0 + {{4{mulOut[2*wx-2]}}, mulOut};
    
    // Output addition and delay chain
    case (cnt8In[3:1])
            0: add8In0P = x8D[0];
            1: add8In0P = x8D[1];
            2: add8In0P = x8D[2];
            3: add8In0P = x8D[3];
            4: add8In0P = x8D[4];
            5: add8In0P = x8D[5];
            6: add8In0P = x8D[5];
            7: add8In0P = x8D[5];
    endcase
    add8In0 = {{2{add8In0P[2*wx+2]}}, add8In0P};
    
    case (cnt8In[3:1])
            0: add8In1 = 0;
            1: add8In1 = 0;
            2: add8In1 = acc8Re;
            3: add8In1 = acc8Im;
            4: add8In1 = acc8Re;
            5: add8In1 = acc8Im;
            6: add8In1 = acc8Re;
            7: add8In1 = acc8Im;
    endcase
    add8Out = add8In0 + add8In1;
   
  end

  // --------------------------------------------------------------------------------------
  // FSM
  always @(posedge clk) begin
    
    // Reset
    if (run1==0) begin
      for (k=0; k<xD_n; k=k+1) begin
        xD[k] <= 0;
      end
      for (k=0; k<x8D_n; k=k+1) begin
        x8D[k] <= 0;
      end
      cntIn     <= 0;
      cnt8In    <= 0;
      addInLast <= 0;
      
    // Normal operation
    end else begin
      
      // Input shift register chain
      if (x_re) begin
        for (k=0; k<xD_n-1; k=k+1) begin
          xD[k] <= xD[k+1];
        end
        xD[xD_n-1] <= x;
      end
      
      // Input counter + signal for last input addition + start signal for output addition
      if (x_re | cntIn[1:0]!=0) begin
        cntIn <= cntIn+1;
      end
      addInLast <= cntIn[1:0]==3;

      // Update of first adder stage
      if (cntIn[1:0]!=0 | addInLast) begin
        if (cntIn[1:0]==1 | cntIn[1:0]==2) begin
          accRe <= addOut;
        end else begin
          accIm <= addOut;
        end
      end

      // Output summation counter, start immediately, stop at 11, cont if cntIn=30
      if (run1 & (cnt8In!=11 | cntIn==30) & (cnt8In!=0 | x_re)) begin
        cnt8In <= cnt8In+1;
      end

      // Output shift register, real/imag alternating
      if (cnt8In==10 | cnt8In==12) begin
        for (k=0; k<x8D_n-1; k=k+1) begin
          x8D[k] <= x8D[k+1];
        end
      end
      if (cntIn==30 | cnt8In==13) begin
        x8D[x8D_n-1] <= addOut;
      end

      // Update 2nd adder stage, the last imag output goes to accRe
      // in order to provide output interleaving of real/imag      
      if (cnt8In==0 | cnt8In==4 | cnt8In==8 | cnt8In==12 | cnt8In==14) begin
        acc8Re <= add8Out;
      end else 
      if (cnt8In==2 | cnt8In==6 | cnt8In==10 | cnt8In==14) begin
        acc8Im <= add8Out;
      end

    end // if (run1_q==0) ... else
  end // always

endmodule
//=========================================================================================

