//====================================================================================
//  Copyright (C) BAY9, 2016
//====================================================================================
//
// MODULE:
//   fft64
//
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

module fft64(y, y_or, y_ff, y_we, 
             x, x_ir, x_fe, x_re, 
             regBus, regWe, regRe, regWeOut, regReOut, clk1, clk2, reset);

  // --------------------------------------------------------------------------------------
  // Module parameters
  parameter y_w               = 0;
  parameter x_w               = 0;


  parameter run1_w            = 0;
  parameter run1_r            = 0;
  parameter run1_s            = 0;
  parameter R_run1            = 0;
  parameter tglMode_w         = 0;
  parameter tglMode_r         = 0;
  parameter tglMode_s         = 0;
  parameter R_tglMode         = 0;
  parameter inpOrd_w          = 0;
  parameter inpOrd_r          = 0;
  parameter inpOrd_s          = 0;
  parameter R_inpOrd          = 0;
  parameter outpOrd_w         = 0;
  parameter outpOrd_r         = 0;
  parameter outpOrd_s         = 0;
  parameter R_outpOrd         = 0;

  parameter w                 = 0;

  // --------------------------------------------------------------------------------------
  // Inputs and outputs
  output            [y_w-1:0]  y;
  output                       y_or;
  input                        y_ff;
  output                       y_we;
  assign                       y_we = y_or & ~y_ff;

  input             [x_w-1:0]  x;
  output                       x_ir;
  input                        x_fe;
  output                       x_re;
  assign                       x_re = x_ir & ~x_fe;


  // Define clock and reset
  input                        clk1;
  input                        clk2;
  input                        reset;

  // Inputs and outputs for registers
  inout                [15:0]  regBus;
  input                        regWe;
  input                        regRe;
  output                       regWeOut;
  output                       regReOut;

  // Wires for external registers
  wire           [run1_w-1:0]  run1_q;
  wire           [run1_w-1:0]  run1_d;
  wire                         run1_weint;

  wire        [tglMode_w-1:0]  tglMode_q;
  wire        [tglMode_w-1:0]  tglMode_d;
  wire                         tglMode_weint;
  assign                       tglMode_weint = 0;

  wire         [inpOrd_w-1:0]  inpOrd_q;
  wire         [inpOrd_w-1:0]  inpOrd_d;
  wire                         inpOrd_weint;
  assign                       inpOrd_weint = 0;

  wire        [outpOrd_w-1:0]  outpOrd_q;
  wire        [outpOrd_w-1:0]  outpOrd_d;
  wire                         outpOrd_weint;
  assign                       outpOrd_weint = 0;

  // Assign clock
  wire clk;
  assign clk = clk1;

  // --------------------------------------------------------------------------------------
  // External status registers
  ereg run1(run1_q, regBus, run1_d, run1_weint, regWe, regRe, clk, reset);
  defparam run1.w    = run1_w;
  defparam run1.rval = run1_r;
  defparam run1.sgn  = run1_s;
  defparam run1.adr  = R_run1;

  ereg tglMode(tglMode_q, regBus, tglMode_d, tglMode_weint, regWe, regRe, clk, reset);
  defparam tglMode.w    = tglMode_w;
  defparam tglMode.rval = tglMode_r;
  defparam tglMode.sgn  = tglMode_s;
  defparam tglMode.adr  = R_tglMode;

  ereg inpOrd(inpOrd_q, regBus, inpOrd_d, inpOrd_weint, regWe, regRe, clk, reset);
  defparam inpOrd.w    = inpOrd_w;
  defparam inpOrd.rval = inpOrd_r;
  defparam inpOrd.sgn  = inpOrd_s;
  defparam inpOrd.adr  = R_inpOrd;

  ereg outpOrd(outpOrd_q, regBus, outpOrd_d, outpOrd_weint, regWe, regRe, clk, reset);
  defparam outpOrd.w    = outpOrd_w;
  defparam outpOrd.rval = outpOrd_r;
  defparam outpOrd.sgn  = outpOrd_s;
  defparam outpOrd.adr  = R_outpOrd;

  // --------------------------------------------------------------------------------------
  // Included instances
  `include "fft8_0.v"
  `include "twiddle_0.v"

  // --------------------------------------------------------------------------------------
  // Define register control output
  assign regWeOut = 0 | fft8_0_regWe | twiddle_0_regWe;
  assign regReOut = 0 | fft8_0_regRe | twiddle_0_regRe;

// ----------------------------------------------------------------------------------------
//=========================================================================================

//-----------------------------------------------------------------------------------------
  // DP memory definition
  reg  [2*w-1:0] mem0[63:0], mem1[63:0];
  reg  [2*w-1:0] mem0Q, mem1Q, mem0QD, mem1QD;
  wire [2*w-1:0] mem0D, mem1D;
  wire     [5:0] wrAdr0, wrAdr1;
  wire     [5:0] rdAdr0, rdAdr1;
  wire           we0, we1;
  wire           mem1WrSel, mem1RdSel;
    
  // DP memory operation
  always @(posedge clk) begin
    if (we0) begin
      mem0[wrAdr0] <= mem0D;
    end          
    mem0Q <= mem0[rdAdr0];
    if (we1) begin
      mem1[wrAdr1] <= mem1D;
    end          
    mem1Q <= mem1[rdAdr1];
  end

  // Define other internal registers, wires etc...
  reg   [2*w-1:0] yOut;
  reg       [5:0] cntIn, cntOut;
  reg       [5:0] cntProc0, cntProc2;  
  reg       [6:0] cntProc1, cntProc3;
  wire      [6:0] cntDiff;  
  reg       [2:0] waitProc2;
  reg       [0:0] fsmProc0, fsmProc2, fsmOut, fsmProc0D;
  wire            fftRun, fsmProc1, fsmProc3, enProc0;
  reg             y_ffD, enProc0D;
  wire  [2*w-1:0] xN;
  wire    [w-1:0] xReN, xImN;
    
  // Assign processing control
  assign fftRun         = run1_q[1];      // 0/1 = off/on
  assign fft8_0_run1    = run1_q;
  assign twiddle_0_run1 = run1_q;
   
  // Input/output control -  input is always ready, output after processing
  assign x_ir = fftRun & fsmProc0==0;    // Input ready when data is not read from mem0  
  
  // Input FSM - write data to mem0
  always @(posedge clk) begin
    if (fftRun==0) begin
      cntIn <= 0;                   // Reset input data counter
    end else begin
      if (x_re==1) begin            // If data is read
        cntIn <= cntIn+1;           //  Count up input data
      end
    end 
  end // Input data FSM
  
  // Enable of FSM 0 only if output reads or has read mem1, the
  // counter for the output must >= the counter reading data
  // and feeding the fft8 -> twiddle chain
  assign  cntDiff = {1'b0, cntOut} - {1'b0, cntProc0} - 1;
  assign  enProc0 = (fsmProc3==0 & fsmOut==0) | (cntDiff[6]==0);

  // Processing FSM0 - read data from mem0, feed into fft8_0 (1)
  always @(posedge clk) begin
    if (fftRun==0) begin
      cntProc0 <= 0;                // Counter states processing data
      fsmProc0 <= 0;                // Processing stopped                         
    end else                                                                    
                                                                                      
    // Processing start                                                             
    if (x_re==1 & cntIn==63) begin  // When 63+1 input values are available        
      fsmProc0 <= 1;                // -> Start processing                        
      cntProc0 <= 1;                // Reset the processing counter (just in case)
      mem0QD   <= mem0Q;            // Set mem0QD = mem0[0]
    end else begin

      // Processing run
      if (fsmProc0==1 & enProc0==1) begin
        if (cntProc0==0) begin
          fsmProc0 <= 0;            // Reset FSM at the end
        end else begin
          cntProc0 <= cntProc0+1;   // Increase process counter
        end
      end // if (fsmProc0==1) begin
      if (enProc0D==1) begin        // Keep memory output delayed
        mem0QD   <= mem0Q;
      end
    
    end // if (fftRun==0) ... else
    enProc0D <= enProc0;            // Delay also the enable
    fsmProc0D <= fsmProc0;          // Delay the proc0 indication
  end // Processing FSM
  
  // Conditional negation of input x to shift output by 32 samples
  assign xReN = -x[  w-1:0];
  assign xImN = -x[2*w-1:w];
  assign xN   = {xImN, xReN};

  // Assign writing to / reading from mem0
  assign mem0D  = (cntIn[0]==0 | outpOrd_q==0) ? x : xN;  // Write input data
  assign we0    = x_re;                                   // to mem0
  assign wrAdr0 = cntIn + (inpOrd_q<<5);                  // Write data normally
  assign rdAdr0 = {cntProc0[2:0], cntProc0[5:3]};         // Read data bit reversed

  // Assign reading from mem0/1 -> feed fft8_0
  // For proc2, simply feed mem1Q into fft8, for proc0, check if
  // the process is currently disabled, possibly feed the saved
  // memory output to compensate the delay
  assign fft8_0_x    = fsmProc2==0 ? (enProc0D ? mem0Q : mem0QD) : mem1Q;
  assign fft8_0_x_fe = ~((fsmProc0 & enProc0) | fsmProc2);

  // Connect fft8_0 -> twiddle_0
  assign twiddle_0_x      = fft8_0_y;
  assign twiddle_0_x_fe   = ~fft8_0_y_or;
  assign fft8_0_y_ff      = 0; // No control at the output of
  assign twiddle_0_y_ff   = 0; // fft8 and twiddle
  
  // Processing FSM 1 - write twiddle output data to mem1
  always @(posedge clk) begin
    if (fftRun==0) begin
      cntProc1 <= 0;
    end else                                                                    
    if (twiddle_0_y_or==1) begin
      cntProc1 <= cntProc1+1;      // Count twiddle outputs
    end                                                                        
  end // Processing FSM 1
  assign fsmProc1 = cntProc1[6]==0 & (cntProc1!=0 | twiddle_0_y_or==1);
  
  // Processing FSM2 - read mem1, feed fft8_0 (2)
  always @(posedge clk) begin
    
    // Reset FSM2
    if (fftRun==0) begin
      cntProc2  <= 0;
      fsmProc2  <= 0;      
      waitProc2 <= 0;
    end else                                                               
     
    // Wait for the end of FSM0
    if (fsmProc0D==1 & fsmProc0==0) begin
      waitProc2 <= 1;
    end else      

    // Wait another 5 cycles for the pipline to finish
    if (waitProc2!=5 & waitProc2!=0) begin
      waitProc2 <= waitProc2+1;
    end else      

    // Processing start                                                     
    if (waitProc2==5 & cntOut==0) begin
      fsmProc2  <= 1;       
      cntProc2  <= 1;       
      waitProc2 <= 0;
    end else

    // Processing run
    if (fsmProc2==1) begin
      if (cntProc2==0) begin
        fsmProc2 <= 0;
      end else begin
        cntProc2 <= cntProc2+1;
      end
    end // end processing run
  end // Processing FSM 2
      
  // Processing FSM 3 - write fft8 output data to mem1 (2nd run)
  always @(posedge clk) begin
    if (fftRun==0) begin
      cntProc3 <= 0;
    end else
    if (fft8_0_y_or==1) begin   // Count fft8 output data
      cntProc3 <= cntProc3+1;   // Serves as address pointer
    end                                                                        
  end // Processing FSM 3
  assign fsmProc3 = cntProc3[6]==1 & (cntProc3!=64 | fft8_0_y_or==1);

  // Output FSM - read data normally from mem1
  always @(posedge clk) begin
    if (fftRun==0) begin
      cntOut <= 0;
      fsmOut <= 0;
    end else
           
    // Processing start                                                             
    if (cntProc3==114) begin
      fsmOut <= 1;            // Output is ready if fsmOut==1   
      cntOut <= 1;            // Set rdAdr1=1
      mem1QD <= mem1Q;        // Set mem1QD = mem1[0]
    end else begin
      
      // Processing run - halt is output is blocked
      if (fsmOut==1 & y_ff==0) begin
        if (cntOut==0) begin   // Reset FSM at the end (if not restarted
          fsmOut <= 0;         // above due to new input data
        end else begin
          cntOut <= cntOut+1;  // Increase process counter
        end
      end
      if (y_ffD==0) begin       // Block writing to delayed output in order
        mem1QD <= mem1Q;        // to keep the old memory out for later
      end                       // reading if the output y is blocked

    end
    y_ffD   <= y_ff;            // Use delayed version of y_ff for some control
  end // Processing FSM
  
  // Assign writing to mem1
  assign mem1WrSel = fsmProc1;
  assign mem1D     = mem1WrSel==1 ? twiddle_0_y : fft8_0_y;    
  assign we1       = (fsmProc1 & twiddle_0_y_we) | (fsmProc3 & fft8_0_y_we); 
  assign wrAdr1    = mem1WrSel==1 ? cntProc1[5:0]
                                  : {cntProc3[2:0], cntProc3[5:3]};
  // Assign reading from mem1
  //assign mem1RdSel = fsmOut;
  assign mem1RdSel = ~fsmProc2;
  assign rdAdr1    = mem1RdSel==0 ? {cntProc2[2:0], cntProc2[5:3]} : cntOut;

  // Possibly use saved version for output if it was halted before
  assign y    = (y_ffD==0) ? mem1Q : mem1QD;
  assign y_or = fsmOut;
  
  // FFT/IFFT auto toggling operation
  assign run1_d     = run1_q ^ {1'b0, tglMode_q};
  assign run1_weint = cntOut==6 & y_we;
  
endmodule
//=========================================================================================

