//====================================================================================
//  Copyright (C) BAY9, 2016
//====================================================================================
//
// MODULE:
//   twiddle
//
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

module twiddle(y, y_or, y_ff, y_we, 
               x, x_ir, x_fe, x_re, 
               run1, 
               regBus, regWe, regRe, regWeOut, regReOut, clk1, clk2, reset);

  // --------------------------------------------------------------------------------------
  // Module parameters
  parameter y_w               = 0;
  parameter x_w               = 0;

  parameter run1_w            = 0;

  parameter w                 = 0;
  parameter wt                = 0;
  parameter tw0               = 0;
  parameter tw1               = 0;
  parameter tw2               = 0;
  parameter tw3               = 0;
  parameter tw4               = 0;
  parameter tw5               = 0;
  parameter tw6               = 0;
  parameter tw7               = 0;
  parameter tw8               = 0;

  // --------------------------------------------------------------------------------------
  // Inputs and outputs
  output            [y_w-1:0]  y;
  output                       y_or;
  input                        y_ff;
  output                       y_we;
  assign                       y_we = y_or & ~y_ff;

  input             [x_w-1:0]  x;
  output                       x_ir;
  input                        x_fe;
  output                       x_re;
  assign                       x_re = x_ir & ~x_fe;

  input          [run1_w-1:0]  run1;

  // Define clock and reset
  input                        clk1;
  input                        clk2;
  input                        reset;

  // Inputs and outputs for registers
  inout                [15:0]  regBus;
  input                        regWe;
  input                        regRe;
  output                       regWeOut;
  output                       regReOut;

  // Assign clock
  wire clk;
  assign clk = clk1;

  // --------------------------------------------------------------------------------------
  // External status registers
  // --------------------------------------------------------------------------------------
  // Define register control output
  assign regWeOut = 0;
  assign regReOut = 0;

// ----------------------------------------------------------------------------------------
//=========================================================================================

//-----------------------------------------------------------------------------------------
  // Counter variable
  integer k;
  
  // Define wires/registers
  wire                     twRun;              // 0/1 = off/on
  wire                     twMode;             // 0 = FFT, 1=IFFT
  
  // Define LUT including aRe+aIm, aRe-aIm procssing
  reg                [5:0] lutAdr, lutAdrNxt, lutAdrD;
  reg                [3:0] lutAdr8;
  reg                [5:0] cnt, cntNxt;
  reg                      x_reD, x_reDD;                 // Delayed read enable
  reg signed      [wt-1:0] twReP, twImP, twReM, twImM;   // Normal and negated twiddle factors
  reg signed      [wt-1:0] twRe, twIm;                   // Final twiddle factors
  reg signed      [wt-1:0] aP, a0, aM;                   // Modified twiddle coefficients
  reg signed    [w+wt-2:0] mP, mM;                       // Long bit width mul outputs
  reg signed    [w+wt-3:0] m0;                           // Long bit width mul outputs
  reg signed       [w-1:0] xRe, xIm, xReD, xImD, xSum;   // Normal/delayed input signals and sum
  reg signed         [w:0] xSumS1;                       // Intermediate sum result
  reg signed       [w+2:0] mOutP, mOut0, mOutM;          // Multiplication outputs
  reg signed       [w-1:0] yRe, yIm;                     // Final outputs
  
  reg signed      [wt-1:0] twRe0, twIm0, twRe1, twIm1;
  reg                [2:0] lutAdrHiP1;
  
  reg                      twModeR, twRunR;
  reg           [2*wt-1:0] twLut;
  
  // Assign mode
  assign twRun  = run1[1];
  assign twMode = run1[0];
  
  // Input/output control - output is assumed to be always writable
  assign x_ir = twRunR;     // Input is always ready
  assign y_or = x_reDD;     // Output is ready 2 cycles after input
  assign y    = {yIm, yRe}; // Combine real/imag for output
  
  // ------------------------------------------------------------------------
  // LUT address counter, LUT address input calculation - combinatorial logic
  always @(*) begin
    
    // Simply count 0:1:63, or 0:-1:-63...
    if (twModeR==0) begin
      cntNxt = cnt-1;
    end else begin
      cntNxt = cnt+1;
    end
    
    // Set LUT-64 input address
    if (cntNxt[2:0]==0) begin // Count 0,0,0,... 0,1,2,...,7, 0,2,4,...14, ...
      lutAdrNxt = 0;
    end else begin
      lutAdrNxt = lutAdr + {{3{~twModeR}}, cntNxt[5:3]} + {5'b0, ~twModeR};
    end
    
    // Set LUT-8 input address
    if (lutAdr[3]==0) begin   // Count 0,1,..,7,8,7,6,..1,0,1,...
      lutAdr8 = lutAdr[3:0];  // for the LUT address input
    end else begin
      lutAdr8 = -lutAdr[3:0];
    end

  end

  // ------------------------------------------------------------------------
  // LUT address counter - FSM
  always @(posedge clk) begin
    if (twRunR==0) begin
      cnt    <= 0;
      lutAdr <= 0;
    end else begin
      if (x_re) begin           // Run FSM only if input is available
        cnt     <= cntNxt;
        lutAdr  <= lutAdrNxt;
      end
    end
    x_reD  <= x_re;         // Delayed read enable for output enable
    x_reDD <= x_reD;      
  end
  
  // ------------------------------------------------------------------------
  // Use handcoded LUT, simpler than LUT module
  always @(*) begin
    case (lutAdr8)
      0: twLut = tw0;
      1: twLut = tw1;
      2: twLut = tw2;
      3: twLut = tw3;
      4: twLut = tw4;
      5: twLut = tw5;
      6: twLut = tw6;
      7: twLut = tw7;
      8: twLut = tw8;
      default: twLut = tw8;
    endcase
  end
  
  // ------------------------------------------------------------------------
  // LUT output processing
  always @(*) begin
  
    // Split output into real/imag, positive and negative outputs
//    twRe0 = {1'b0, lut_twiddle_dat[w-2:0]};
//    twIm0 = {1'b0, lut_twiddle_dat[2*(w-1)-1:w-1]};

    twRe0 = {1'b0, twLut[wt-2:0]};
    twIm0 = {1'b0, twLut[2*(wt-1)-1:wt-1]};

    // This calculation simplifies subsequent selection
    lutAdrHiP1 = lutAdr[5:3]+1;

//    if (lutAdrD[5:3]==0 | lutAdrD[5:3]==1 | lutAdrD[5:3]==2 | lutAdrD[5:3]==7)
    if (lutAdrHiP1[2]==0)
      twRe1 =  twRe0;
    else
      twRe1 = -twRe0;
    
//    if (lutAdrD[5:3]==0 | lutAdrD[5:3]==1 | lutAdrD[5:3]==3 | lutAdrD[5:3]==6)
    if (^lutAdrHiP1)
      twIm1 =  twIm0;
    else
      twIm1 = -twIm0;

//    if (lutAdrD[5:3]==0 | lutAdrD[5:3]==3 | lutAdrD[5:3]==4 | lutAdrD[5:3]==7) begin
    if (lutAdrHiP1[1]==0) begin
      twRe = twRe1;
      twIm = twIm1;
    end else begin
      twRe = twIm1;
      twIm = twRe1;
    end
    
  end // LUT output processing

  // ------------------------------------------------------------------------
  // Multiplication - use only 3 multipliers
  always @(posedge clk) begin    // Calculate multiplication factors
    aP <= twRe + twIm;
    a0 <= twIm;
    aM <= twRe - twIm;
  end
  
  always @(*) begin             // Select real/imag of input
    xRe     = x[w-1:0];         // -> sum with 1 bit more
    xIm     = x[2*w-1:w];       
    xSumS1  = {xRe[w-1], xRe} + {xIm[w-1], xIm};
  end
  
  always @(posedge clk) begin   // Calculate mul input, registered
    xReD <= xRe;
    xImD <= xIm;
    xSum <= xSumS1[w:1];        // >> 1 (use complicated way to avoid Quartus warning)
  end
  
  // Use a few bits more, add rounding after mul
  always @(*) begin
    mP = aP*xReD;  // Multiplication (after delay)
    m0 = a0*xSum;
    mM = aM*xImD;
    // Shifting: mOut0 would need 0.5 bit more, but it does not matter because
    //           these bits are cut anyway during final addition
    mOutP = mP[w+wt-2:wt-4];  // mOutP = mP >>> (wt-4)
    mOut0 = m0[w+wt-3:wt-5];  // mOut0 = m0 >>> (wt-5)
    mOutM = mM[w+wt-2:wt-4];  // mOutM = mM >>> (wt-4)
  end // always @(*) begin
    
  // ------------------------------------------------------------------------
  always @(posedge clk) begin   // Final addition (registered)
    yRe <= (mOutP - mOut0 + 4) >>> 3;
    yIm <= (mOutM + mOut0 + 4) >>> 3;
  end                                                                                    

  always @(posedge clk) begin
    twRunR <= twRun;
    if (cnt==0) begin
      twModeR <= twMode;
    end
  end
  
endmodule
//=========================================================================================

