-------------------------------------------------------------------------------
-- NSD_uart.vhd
--
-- Author : Nial Stewart, Nial Stewart Developments Ltd
--          www.nialstewartdevelopments.co.uk
-- Date   : Feb/08
--
--
--------------------------------------------------------------------------
--
-- NOTE: We have a more 'capable' interface which can handle more than
-- one address byte and 1-4 data bytes. Please contact us for details.
--
--------------------------------------------------------------------------
-- This module is a 'target' interface for a processor, interpreting
-- read/write commands sent on an RS232 interface and translating
-- them to a local bus interface. Writes are actioned, reads are performed
-- with the read data being returned on the serial interface.
--
-- The MSbit of the first data byte received is a rd/wr_n bit,
-- the next 7 bits are the 7 most significant address bits.
--
-- If a write is being performed the next two bytes are the data to be
-- written, this is received then written on the local bus. If a read
-- is being performed the relevant address is read on the local bus and
-- the data sent back to the source.
--
-- RS232 Data for Write...
-- In  -  ...'0' & Address ....MSByte.....LSByte
-- Out -  .......................................
--
-- RS232 Data for Read
-- In  -  ...'1' & Address.......................
-- Out -  .....................MSByte....LSByte
--
--
--
-- All transfers are most significant bits first.
--
-- This module Structure.
--
-- Rs232  -----------------          -------------
-- In     -      Rx       -   --->   -           -      Local Bus
-- ---->  -    Process    -          -           -
--        -----------------          -  Master   -  ----> Addr
--                                    -  Process  -  ----> Dout
-- RS232  -----------------          -           -  <---- Din
-- Out    -      Tx       -   <---   -           -  ----> Rd
-- <----  -    Process    -          -           -  ----> Wr
--        -----------------          -------------
--
--
-- The Rx and Tx processes are what's normally defined as the 'uart'.
--
--------------------------------------------------------------------------


library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;


entity nsd_uart is
generic(
      CLK_TICKS_PER_BIT : in integer range 1 to 1024 := 434 -- Default for 50MHz system clock
      );

port( clk               : in std_logic;
      rst               : in std_logic; -- '1' = reset

      -- rs232 interface
      rx_data           : in std_logic;
      tx_data           : out std_logic;

      -- Interface to registers
      lb_address        : out std_logic_vector(6 downto 0); -- Note top bit is rd/wr_n flag.
      lb_d_out          : out std_logic_vector(15 downto 0);
      lb_d_in           : in std_logic_vector(15 downto 0);
      lb_wr             : out std_logic;
      lb_rd             : out std_logic
      );
end nsd_uart;

architecture rtl of nsd_uart is

-------------------------------------------------------------------------------
--
-- The following signals are associated with the top level 'master' process
-- that drives the process that drives the interface.
--
-------------------------------------------------------------------------------

type master_state_type is (IDLE, GET_ADDRESS, GET_DATA, LB_READ_DATA, LB_WRITE_DATA, RETURN_DATA);
signal master_state     : master_state_type;

constant WRITE    : std_logic := '0';
constant READ     : std_logic := '1';

signal rd_wrn     : std_logic;
signal writing    : std_logic;
signal rd_data    : std_logic_vector(15 downto 0);
signal word_count : integer range 0 to 4;

signal lb_address_int  : std_logic_vector(7 downto 0);
signal lb_d_out_int    : std_logic_vector(15 downto 0);
signal lb_rd_int       : std_logic;

-------------------------------------------------------------------------------
--  The following are associated with the serial interface process.
-------------------------------------------------------------------------------

--
-- RX Process
--

type rx_state_type is (IDLE, START_CHECK, RXING, END_DELAY);
signal rx_state         : rx_state_type;

signal rx_bit_count     : integer range 1 to 10;
signal rx_shift_reg     : std_logic_vector(9 downto 0);
signal rxd_byte         : std_logic_vector(7 downto 0);
signal rx_clk_count     : integer range 1 to 1024;
signal got_byte         : std_logic; -- flag back to master process that a byte's been received.

signal rx_data_d        : std_logic;
signal d_in             : std_logic;



--
-- Tx Buffer
--

type tx_fifo_type is array (0 to 7) of std_logic_vector(7 downto 0);
signal tx_fifo          : tx_fifo_type;
signal tx_fifo_wr       : std_logic;
signal tx_wr_ptr        : integer range 0 to 7;
signal tx_fifo_rd       : std_logic;
signal tx_rd_ptr        : integer range 0 to 7;
signal tx_fifo_fill     : integer range 0 to 8;
signal fifo_overflow    : std_logic;
signal transmit_byte    : std_logic_vector(7 downto 0);
signal next_op_byte     : std_logic_vector(7 downto 0);

type tx_state_type is (IDLE, SENDING, END_WAIT);
signal tx_state         : tx_state_type;
signal tx_shift_reg     : std_logic_vector(9 downto 0);
signal tx_bit_count     : integer range 1 to 10;
signal tx_clk_count     : integer range 1 to 1024;

begin


-------------------------------------------------------------------------------
--
-- Top Level Master Process
--
-- This drives the serial interface processes. Everything is
-- fairly self explanatory.
-------------------------------------------------------------------------------

process(clk,rst)
begin
if(rst = '1') then
    master_state    <= IDLE;
    lb_address_int  <= (others => '0');
    lb_d_out_int    <= (others => '0');
    lb_wr           <= '0';
    lb_rd_int       <= '0';
    rd_wrn          <= '0';
    word_count      <= 1;
    tx_fifo_wr      <= '0';
    rd_data         <= (others => '0');
    transmit_byte   <= (others => '0');
elsif(rising_edge(clk)) then
  case master_state is

    when IDLE =>
      if(got_byte = '1') then
        rd_wrn       <= rxd_byte(7);  -- Top bit of first address word is rd/wrn flag
        master_state <= GET_ADDRESS;
        lb_address_int(7 downto 0) <= rxd_byte;   -- Load into least significant byte, shift up each next byte.
        word_count <= 0;
      end if;

    when GET_ADDRESS => -- Waiting for the address to be received
      if(word_count = 0) then
        if(rd_wrn = READ) then
          master_state <= LB_READ_DATA;
          lb_rd_int    <= '1';
        else
          master_state <= GET_DATA;
          word_count <= 2;
        end if;
      elsif(got_byte = '1') then
        lb_address_int(7 downto 0) <= rxd_byte;     -- Load address bytes in from LSB and ...
        word_count <= word_count - 1;
      end if;

    when GET_DATA =>   -- Wait for data ..
      if(word_count = 0) then
        master_state <= LB_WRITE_DATA;
        lb_wr        <= '1';
      elsif(got_byte = '1') then
        lb_d_out_int(7 downto 0) <= rxd_byte; -- Load in bottom

        lb_d_out_int(lb_d_out_int'HIGH downto 8) <= lb_d_out_int(lb_d_out_int'HIGH -8 downto 0); -- Shift up.

        word_count <= word_count - 1;
      end if;

    when LB_READ_DATA =>
      if(lb_rd_int = '1') then                          --Have to delay a clock cycle for the data to be returned.
        lb_rd_int <= '0';
      else
        rd_data      <= lb_d_in;
        word_count   <= 2;
        master_state <= RETURN_DATA;
      end if;
    when LB_WRITE_DATA =>
      master_state <= IDLE;
      lb_wr        <= '0';

    when OTHERS => --RETURN_DATA
      if(word_count = 0) then
        master_state <= IDLE;
        tx_fifo_wr   <= '0';
      else
        transmit_byte <= rd_data(rd_data'HIGH downto rd_data'HIGH - 7);
        tx_fifo_wr <= '1';

        rd_data(rd_data'HIGH downto 8) <= rd_data((rd_data'HIGH - 8) downto 0); -- Shift up by one byte

        word_count <= word_count - 1;
      end if;
      lb_wr      <= '0';
      lb_rd_int      <= '0';

  end case;
end if;
end process;

lb_address <= lb_address_int(lb_address_int'high - 1 downto 0);
lb_d_out <= lb_d_out_int;
lb_rd <= lb_rd_int;

--------------------------------------------------------------------------------
--
--  Serial Interface processes
--
--------------------------------------------------------------------------------


-------------------------------------------------------------------------------
-- Rx Process. This process just sits waiting for characters and flags to the
-- master process when one's received properly.
-------------------------------------------------------------------------------


process(clk,rst)
begin
if(rst = '1') then

  rx_data_d     <= '1';
  d_in          <= '1';

  rx_state      <= IDLE;
  rx_bit_count  <= 1;
  rx_shift_reg  <= (others => '0');
  rx_clk_count  <= CLK_TICKS_PER_BIT;
  rxd_byte      <= (others => '0');
  got_byte      <= '0';

elsif(rising_edge(clk)) then
  rx_data_d <= rx_data;
  d_in      <= rx_data_d;

  case rx_state is
    when IDLE =>
      got_byte <= '0';

      if(d_in = '0') then                       -- If start bit detected...
        rx_clk_count <= CLK_TICKS_PER_BIT/2;    -- ..wait half a bit period...
        rx_state <= START_CHECK;
      end if;

    when START_CHECK =>
      if(rx_clk_count = 1) then
        if(d_in = '0') then                     -- ..if it's still '0' then wait for..
          rx_clk_count <= CLK_TICKS_PER_BIT;    -- ...one bit period and start receiving bits
          rx_bit_count <= 1;
          rx_state     <= RXING;
        else
          rx_state <= IDLE;
        end if;
      else
        rx_clk_count <= rx_clk_count - 1;
      end if;

    when RXING =>
      if(rx_clk_count = 1) then
        rx_shift_reg(0) <= d_in;
        rx_shift_reg(9 downto 1) <= rx_shift_reg(8 downto 0);

        if(rx_bit_count = 9) then               -- Wait 'till mid stop bit
          rx_clk_count <= CLK_TICKS_PER_BIT;    -- At end wait for one more bit period
          rx_state <= END_DELAY;                -- so the rest of the stop bit isn't seen as a start bit
        else
          rx_clk_count <= CLK_TICKS_PER_BIT;
          rx_bit_count <= rx_bit_count + 1;
        end if;
      else
        rx_clk_count <= rx_clk_count - 1;
      end if;

    when OTHERS => -- END_DELAY
        for j in 0 to 7 loop                    -- Swap bit order, lsb is tx'd first.
          rxd_byte(j) <= rx_shift_reg(8 - j);
        end loop;
        got_byte <= '1';
        rx_state <= IDLE;

  end case;

end if;
end process;


-------------------------------------------------------------------------------
-- Tx process. Very simple output process
-------------------------------------------------------------------------------

--
-- As the serial transmission of the 4 bytes associated with a 32 bit word
-- read can take longer than the next read instruction to arrive we have to
-- buffer up the output data.
--

process(clk,rst)
begin
if(rst = '1') then
  tx_wr_ptr     <= 0;
  tx_rd_ptr     <= 0;
  tx_fifo_fill  <= 0;
  tx_fifo       <= (others => (others => '0'));
elsif(rising_edge(clk)) then
  if(tx_fifo_wr = '1') then
    if(tx_wr_ptr = 7) then
      tx_wr_ptr <= 0;
    else
      tx_wr_ptr <= tx_wr_ptr + 1;
    end if;
  end if;

  if(tx_fifo_rd = '1') then
    if(tx_rd_ptr = 7) then
      tx_rd_ptr <= 0;
    else
      tx_rd_ptr <= tx_rd_ptr + 1;
    end if;
  end if;

  if(tx_fifo_wr = '1' and tx_fifo_rd = '0') then
    if(tx_fifo_fill < 8) then                       -- Check for overflow.
      tx_fifo_fill <= tx_fifo_fill + 1;
    else
      fifo_overflow <= '1';
    end if;
  elsif(tx_fifo_wr = '0' and tx_fifo_rd = '1') then
    tx_fifo_fill <= tx_fifo_fill - 1;             -- Presume we won't cause 'underflow'.
  end if;

  if(tx_fifo_wr = '1') then
    tx_fifo(tx_wr_ptr) <= transmit_byte;
  end if;
end if;
end process;

next_op_byte <= tx_fifo(tx_rd_ptr);

-- Test loopback.
--tx_fifo_wr <= got_byte;
--transmit_byte <= rxd_byte;


-------------------------------------------------------------------------------
-- The output process. Simply waits until tx_fifo_fill isn't 0 then reads
-- a byte and starts transmitting.
-------------------------------------------------------------------------------

process(clk,rst)
begin
if(rst = '1') then
  tx_state         <= IDLE;
  tx_shift_reg     <= (others => '1');
  tx_bit_count     <= 1;
  tx_clk_count     <= 1;
  tx_fifo_rd       <= '0';
elsif(rising_edge(clk)) then

  case tx_state is
    when IDLE =>
      if(tx_fifo_fill /= 0) then
        tx_shift_reg(9)         <= '0'; -- Start bit
        for i in 0 to 7 loop            -- Swap bit order, lsb goes first.
          tx_shift_reg(8 - i) <= next_op_byte(i);
        end loop;
        tx_shift_reg(0)          <= '0';
        tx_fifo_rd              <= '1';
        tx_bit_count            <= 1;
        tx_clk_count            <= CLK_TICKS_PER_BIT;    -- Set delay to 1 bit period.
        tx_state                <= SENDING;
      else
        tx_shift_reg <= (others => '1');
      end if;

    when SENDING =>
      tx_fifo_rd <= '0';                         -- De-assert
      if(tx_clk_count = 1) then
        tx_shift_reg(9 downto 1) <= tx_shift_reg(8 downto 0);
        tx_shift_reg(0) <= '1';

        if(tx_bit_count = 10) then                -- Clock out 9 bit periods.
          tx_state <= END_WAIT;
          tx_clk_count <= CLK_TICKS_PER_BIT;
          tx_bit_count <= 1;
        else
          tx_bit_count <= tx_bit_count + 1;
          tx_clk_count <= CLK_TICKS_PER_BIT;
        end if;
      else
        tx_clk_count <= tx_clk_count - 1;
      end if;

    when OTHERS => --END_WAIT -- Wait for 4 bit periods between characters
      if(tx_clk_count = 1) then
        if(tx_bit_count = 4) then
          tx_state <= IDLE;
        else
          tx_bit_count <= tx_bit_count + 1;
          tx_clk_count <= CLK_TICKS_PER_BIT;
        end if;
      else
        tx_clk_count <= tx_clk_count - 1;
      end if;

  end case;


  tx_data <= tx_shift_reg(9);

end if;
end process;





end rtl;



