//====================================================================================
//  Copyright (C) BAY9, 2016
//====================================================================================
//
// MODULE:
//   fft8
//
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

module fft8(y, y_or, y_ff, y_we, 
            x, x_ir, x_fe, x_re, 
            run1, 
            regBus, regWe, regRe, regWeOut, regReOut, clk1, clk2, reset);

  // --------------------------------------------------------------------------------------
  // Module parameters
  parameter y_w               = 0;
  parameter x_w               = 0;

  parameter run1_w            = 0;

  parameter w                 = 0;
  parameter wi                = 0;

  // --------------------------------------------------------------------------------------
  // Inputs and outputs
  output            [y_w-1:0]  y;
  output                       y_or;
  input                        y_ff;
  output                       y_we;
  assign                       y_we = y_or & ~y_ff;

  input             [x_w-1:0]  x;
  output                       x_ir;
  input                        x_fe;
  output                       x_re;
  assign                       x_re = x_ir & ~x_fe;

  input          [run1_w-1:0]  run1;

  // Define clock and reset
  input                        clk1;
  input                        clk2;
  input                        reset;

  // Inputs and outputs for registers
  inout                [15:0]  regBus;
  input                        regWe;
  input                        regRe;
  output                       regWeOut;
  output                       regReOut;

  // Assign clock
  wire clk;
  assign clk = clk1;

  // --------------------------------------------------------------------------------------
  // External status registers
  // --------------------------------------------------------------------------------------
  // Define register control output
  assign regWeOut = 0;
  assign regReOut = 0;

// ----------------------------------------------------------------------------------------
//=========================================================================================

//-----------------------------------------------------------------------------------------
  // Counter variable
  integer k;
  
  // Define wires/registers
  wire                     fftRun;              // 0/1 = off/on
  wire                     fftMode;             // 0 = FFT, 1=IFFT
  reg                [2:0] cntIn;               // Input counter
  reg                      fsmOut, fsmOutD;     // Output state machine, and delayed
  reg                [2:0] cntOut;              // Output counter
  reg                      fsmProc;             // Processing state machine
  reg                [2:0] cntProc;             // Processing counter
  reg            [2*w-1:0] sIn[6:0];            // Input shift register
  reg           [2*wi-1:0] sOut[7:0];           // Output shift register
  reg           [2*wi-1:0] fIn[7:0];            // FFT processor input registers
  reg           [2*wi-1:0] fOut[3:0];           // FFT processor direct output
  reg           [2*wi-1:0] fOutBuf[3:0];        // FFT processor output buffer registers
  reg signed      [wi-1:0] xRe[3:0], xIm[3:0];  // FFT processing function output
  reg signed      [wi-1:0] yRe[3:0], yIm[3:0];  // FFT processing function output
  reg signed      [wi-1:0] add0In0, add0In1, add0Out; // Add0 in/out
  reg signed      [wi-1:0] add1In0, add1In1, add1Out; // Add1 in/out
  reg signed      [wi-1:0] sub0In0, sub0In1, sub0Out; // Sub0 in/out
  reg signed      [wi-1:0] sub1In0, sub1In1, sub1Out; // Sub1 in/out
  reg signed      [wi-1:0] mulOutAdd, mulOutSub;  // Multiplier outputs
  reg           [2*wi-1:0] psOut[7:0];            // Perfect shuffle in and out
  wire signed      [w-1:0] sOut0SatRe, sOut0SatIm;// Saturated shifter output
  reg            [2*w-1:0] y;                     // Declare output registered
  
  // Assign processing control
  assign fftMode  = run1[0];      // 0/1 = FFT/IFFT
  assign fftRun   = run1[1];      // 0/1 = off/on
   
  // Input/output control -  input is always ready, output after processing
  assign x_ir = fftRun;           // Input ready when not switched off
  assign y_or = fsmOutD;          // Write data when output FSM is active (delayed)

  // FFT-8 processor, combinatorial logic
  //always @(*) begin
  always @(cntProc or fftMode or fIn[0] or fIn[1] or fIn[2] or
           fIn[3] or fIn[4] or fIn[5] or fIn[6] or fIn[7] or 
           fOutBuf[0] or fOutBuf[1] or fOutBuf[2] or fOutBuf[3]) begin
           
    // Input data selection
    if (cntProc==0 | cntProc==4 | cntProc==6) begin
      xRe[0] = fIn[0][wi-1:0]; xIm[0] = fIn[0][2*wi-1:wi];
      xRe[1] = fIn[1][wi-1:0]; xIm[1] = fIn[1][2*wi-1:wi];
      xRe[2] = fIn[2][wi-1:0]; xIm[2] = fIn[2][2*wi-1:wi];
      xRe[3] = fIn[3][wi-1:0]; xIm[3] = fIn[3][2*wi-1:wi];
    end else begin
      xRe[0] = fIn[4][wi-1:0]; xIm[0] = fIn[4][2*wi-1:wi];                
      xRe[1] = fIn[5][wi-1:0]; xIm[1] = fIn[5][2*wi-1:wi];                
      xRe[2] = fIn[6][wi-1:0]; xIm[2] = fIn[6][2*wi-1:wi];                
      xRe[3] = fIn[7][wi-1:0]; xIm[3] = fIn[7][2*wi-1:wi];                
    end                                                                
    
    // Simple bufferfly for the inputs/outputs 0/1                      
    yRe[0] = xRe[0] + xRe[1];                                          
    yIm[0] = xIm[0] + xIm[1];
    yRe[1] = xRe[0] - xRe[1];
    yIm[1] = xIm[0] - xIm[1];

    // Add/sub inputs
    if (cntProc==6 | cntProc==7) begin
      add0In0 = xRe[2]; add0In1 = xRe[3];
      sub0In0 = xRe[2]; sub0In1 = xRe[3];
      add1In0 = xIm[2]; add1In1 = xIm[3];
      sub1In0 = xIm[2]; sub1In1 = xIm[3];
    end else
         
    if (cntProc==0 | cntProc==1 | cntProc==4 | cntProc==5) begin
      add0In0 = xRe[2]; add0In1 = xRe[3];
      add1In0 = xIm[2]; add1In1 = xIm[3];
      if (fftMode==0) begin
        sub0In0 = xRe[3]; sub0In1 = xRe[2];   // FFT
        sub1In0 = xIm[2]; sub1In1 = xIm[3];
      end else begin
        sub0In0 = xRe[2]; sub0In1 = xRe[3];   // IFFT
        sub1In0 = xIm[3]; sub1In1 = xIm[2];
      end
    end else
         
    begin
      if (fftMode==0) begin
        add0In0 = xIm[3]; add0In1 = xRe[3];   // FFT
        sub0In0 = xIm[3]; sub0In1 = xRe[3];
      end else begin
        add0In0 = xIm[3]; add0In1 = xRe[3];   // IFFT
        sub0In0 = xRe[3]; sub0In1 = xIm[3];
      end  
      // Not important, just for completeness
      add1In0 = xIm[2]; add1In1 = xIm[3];
      sub1In0 = xIm[2]; sub1In1 = xIm[3];
    end                                                               
    
        
    // Define add/sub
    add0Out = add0In0 + add0In1;
    add1Out = add1In0 + add1In1;
    sub0Out = sub0In0 - sub0In1;
    sub1Out = sub1In0 - sub1In1;

    // Define multiplication by 1/sqrt(2), include rounding
    mulOutAdd = (add0Out*46341 + 32768) >>> 16;
    mulOutSub = (sub0Out*46341 + 32768) >>> 16;
//    mulOutAdd = (add0Out*181 + 128) >>> 8;
//    mulOutSub = (sub0Out*181 + 128) >>> 8;

    
    // Assign FFT processor outputs
    if (cntProc==6 | cntProc==7) begin
      yRe[2] = add0Out;           // Normal butterfly
      yIm[2] = add1Out;
      yRe[3] = sub0Out;
      yIm[3] = sub1Out;
    end else
         
    if (cntProc==0 | cntProc==1 | cntProc==4 | cntProc==5) begin
      yRe[2] = add0Out;           // Butterfly with 90 degree rotation
      yIm[2] = add1Out;
      yRe[3] = sub1Out;
      yIm[3] = sub0Out;
    end else
         
    begin
      if (fftMode==0) begin
        yRe[2] = mulOutAdd;
        yIm[2] = mulOutSub;
      end else begin
        yRe[2] = mulOutSub;
        yIm[2] = mulOutAdd;
      end
      yRe[3] = xRe[2];
      yIm[3] = xIm[2];                                                
    end                                                               

                                                                      
    // Assign outputs (real/imag combined)                                                
    fOut[0] = {yIm[0], yRe[0]};                                          
    fOut[1] = {yIm[1], yRe[1]};                                          
    fOut[2] = {yIm[2], yRe[2]};                                          
    fOut[3] = {yIm[3], yRe[3]};                                          
 
    // Assign perfect shuffle feedback network
    psOut[0] = fOutBuf[0];
    psOut[4] = fOutBuf[1];
    psOut[1] = fOutBuf[2];
    psOut[5] = fOutBuf[3];
    psOut[2] = fOut[0];
    psOut[6] = fOut[1];
    psOut[3] = fOut[2];
    psOut[7] = fOut[3];

  end // FFT-8 processor, combinatorial logic

  // Input FSM
  always @(posedge clk) begin
    if (fftRun==0) begin
      cntIn <= 0;                   // Reset input data counter
    end else begin
      if (x_re==1) begin            // If data is read
        cntIn  <= cntIn+1;          //  Count up input data
        sIn[6] <= x;                //  Save up to 7 complex words
        for (k=0; k<6; k=k+1) begin //  in a shift register
          sIn[k] <= sIn[k+1];
        end
      end
    end 
  end //always
  
  // Processing FSM
  always @(posedge clk) begin
    if (fftRun==0) begin
      cntProc <= 0;                     // Counter states processing data
      fsmProc <= 0;                     // Processing stopped                         
    end else begin                                                                    
                                                                                      
      // Processing start                                                             
      if (x_re==1 & cntIn==7) begin     // When 7+1 input values are available        
        fsmProc <= 1;                   // -> Start processing                        
        cntProc <= 0;                   // Reset the processing counter (just in case)
        fIn[0]  <= {{(wi-w){sIn[0][2*w-1]}}, sIn[0][2*w-1:w],  // }                                        
                    {(wi-w){sIn[0][w-1]}},   sIn[0][w-1:0]};   // } 
        fIn[4]  <= {{(wi-w){sIn[1][2*w-1]}}, sIn[1][2*w-1:w],  // } 
                    {(wi-w){sIn[1][w-1]}},   sIn[1][w-1:0]};   // } Copy the value from the
        fIn[2]  <= {{(wi-w){sIn[2][2*w-1]}}, sIn[2][2*w-1:w],  // } shift register to the
                    {(wi-w){sIn[2][w-1]}},   sIn[2][w-1:0]};   // } processor input
        fIn[6]  <= {{(wi-w){sIn[3][2*w-1]}}, sIn[3][2*w-1:w],  // } registers using bit
                    {(wi-w){sIn[3][w-1]}},   sIn[3][w-1:0]};   // } reverse addressing and
        fIn[1]  <= {{(wi-w){sIn[4][2*w-1]}}, sIn[4][2*w-1:w],  // } sign extension of real/imag
                    {(wi-w){sIn[4][w-1]}},   sIn[4][w-1:0]};   // } parts
        fIn[5]  <= {{(wi-w){sIn[5][2*w-1]}}, sIn[5][2*w-1:w],  // }
                    {(wi-w){sIn[5][w-1]}},   sIn[5][w-1:0]};   // }
        fIn[3]  <= {{(wi-w){sIn[6][2*w-1]}}, sIn[6][2*w-1:w],  // } 
                    {(wi-w){sIn[6][w-1]}},   sIn[6][w-1:0]};   // } 
        fIn[7]  <= {{(wi-w){     x[2*w-1]}},      x[2*w-1:w],  // } Copy the last value directly
                    {(wi-w){     x[w-1]}},        x[w-1:0]};   // } 
      end else
      
      // Processing run
      if (fsmProc==1) begin

        cntProc <= cntProc+1;                   // Increase process counter
        for (k=0; k<4; k=k+1) begin             // Buffer processor output,
          fOutBuf[k] <= fOut[k];                // always active
        end
        
        if (cntProc==1 | cntProc==5) begin      // Perfect shuffle feedback after
          for (k=0; k<8; k=k+1) begin           // 2nd and 6th processing block
            fIn[k] <= psOut[k];
          end
        end else

        if (cntProc==2 | cntProc==3) begin
           fIn[6] <= fOut[2];                   // Special feedback for 1/sqrt(2)
           fIn[7] <= fOut[3];                   // scaling after 3rd/4th block
        end else
        
        if (cntProc==7) begin         // Reset FSM at the end (if not restarted
          fsmProc <= 0;               // above due to new input data
        end

      end // if (fsmProc==1) begin
    end // if (fftRun==0) ... else
  end // Processing FSM
                                                                                
  // Output FSM                                                                        
  always @(posedge clk) begin                                                          
    if (fftRun==0) begin                                                               
      cntOut <= 0;                      // Counter states processing data              
      fsmOut <= 0;                      // Processing stopped                          
    end else begin                                                                     
                                                                                       
      // Processing start                                                              
      if (cntProc==7) begin             // When processing is in its last state
        fsmOut <= 1;                    // -> Start output
        cntOut <= 0;                    // -> Reset output counter
        for (k=0; k<8; k=k+1) begin     // -> Use perfect shuffled output
          sOut[k] <= psOut[k];          // -> Load it into output shift chain
        end
      end else
          
      if (fsmOut==1) begin              // Output state machine
        cntOut <= cntOut+1;
        for (k=0; k<7; k=k+1) begin     // Shift the output values to 
          sOut[k] <= sOut[k+1];         // the output y, mapping is
        end                             // sOut[0] -> sat -> y
        if (cntOut==7) begin            // Reset output state machine, if
          fsmOut <= 0;                  // it is not restarted again above
        end  
      end //if (fsmOut==1) 
    end //if (fftRun==0) ... else
  end // Output FSM

  // Saturate from wi -> w bits, register output
  sat satRe(sOut0SatRe, sOut[0][wi-1:1]);
  defparam satRe.y_w = w;
  defparam satRe.x_w = wi-1;
  
  sat satIm(sOut0SatIm, sOut[0][2*wi-1:wi+1]);
  defparam satIm.y_w = w;
  defparam satIm.x_w = wi-1;

  always @(posedge clk) begin
    y       <= {sOut0SatIm, sOut0SatRe};
    fsmOutD <= fsmOut;
  end
  
endmodule
//=========================================================================================

