当前位置:   article > 正文

基于 FPGA 的 RISC CPU 设计(2)详细的模块设计思路及其 Verilog 代码_fpag实现cpu

fpag实现cpu

引言

        其实,一个 CPU 的设计中,各个子模块都是比较基本的、比较简单的,只是组合起来的一个整体架构会比较复杂而已,无论是时序路径,还是数据通路和控制通路,这里,主要详细介绍整个微架构的子模块。


一、PC 取指、PC 分支、指令跳转与二级堆栈

        PC 取指主要是 PC 值作为地址,在程序存储器(EPROM)中读取指令数据,并发送给指令寄存器 IR。通常情况下,都是逐一读出的,也就是说 PC 值在下一个时钟(流水时钟)自动加一,来读取下一地址所在的指令(当然,PC 的修改量取决于指令字长和编址方式)。然而,有时候会出现程序分支、程序跳转之类的,使得程序需要执行另一个区域所在的指令,于是就出现了 PC 分支。

1.1、对于 PC 分支 Branch,通常分为以下几种情况:

  1. RETLW:返回,将堆栈 stack1stack2 赋值给 PC;
  2. CALL :调用,将 status[6:5],1'b0,inst[7:0] 赋值给 PC;
  3. GOTO:跳转,将 status[6:5],inst[8:0] 赋值给 PC;
  4. MOVWF:MOVWF PCL,将 pc[10:8],dbus 赋值给 PC;(另外,还有 ADDWF 和 BSF,这里不加以实现);
  5. DEFAULT:PC <- PC + 1。

1.2、对于指令跳转,能够使指令 Jump 的指令有 GOTO、CALL、RETLW、BTFSC、BTFSS、DECFSZ、INCFSZ。

1.3、对于 stacklevel 的调用与返回,使用了状态机,如下。圆圈里的代表 stacklevel,右边的数字代表已经调用的子程序层数。

  1. 当执行 CALL 指令的时候,进行压栈操作 push,PC 赋值给堆栈,同时改变 stacklevel 的状态;

  2. 当执行 RETLW 指令的时候,进行弹出操作 pop,堆栈返回给 PC,同时改变 stacklevel 的状态。

        详细的电路模块和 Verilog 代码如下:

  1. always @(posedge clk) begin
  2. if (!rst_n)
  3. pc <= RESET_VECTOR;
  4. else
  5. pc <= pc_mux;
  6. end
  7. always @(inst or stacklevel or status or stack1 or stack2 or pc or dbus) begin
  8. casex ({inst, stacklevel})
  9. 14'b1000_????_????_11: pc_mux = stack2; // RETLW
  10. 14'b1000_????_????_01: pc_mux = stack1; // RETLW
  11. 14'b1001_????_????_??: pc_mux = {status[6:5], 1'b0, inst[7:0]}; // CALL
  12. 14'b101?_????_????_??: pc_mux = {status[6:5], inst[8:0]}; // GOTO
  13. 14'b00?0_0010_0010_??: pc_mux = {pc[10:8], dbus}; // MOVWF PCL
  14. default:
  15. pc_mux = pc + 11'd1;
  16. endcase
  17. end
  18. always @(posedge clk) begin
  19. if (!rst_n) begin
  20. stack1 <= 11'd0;
  21. stack2 <= 11'd0;
  22. end
  23. else begin
  24. // CALL Instruction
  25. if (inst[11:8] == 4'b1001) begin
  26. case (stacklevel)
  27. 2'b00: stack1 <= pc;
  28. 2'b01: stack2 <= pc;
  29. default: begin
  30. stack1 <= 11'd0;
  31. stack2 <= 11'd0;
  32. end
  33. endcase
  34. end
  35. end
  36. end
  37. always @(posedge clk) begin
  38. if (!rst_n)
  39. stacklevel <= 2'b00;
  40. else begin
  41. casex ({inst, stacklevel})
  42. // CALL Instruction
  43. 14'b1001_????_????_00: stacklevel <= 2'b01; // Record 1st CALL
  44. 14'b1001_????_????_01: stacklevel <= 2'b11; // Record 2nd CALL
  45. 14'b1001_????_????_11: stacklevel <= 2'b11; // Ignore
  46. // RETLW Instruction
  47. 14'b1000_????_????_11: stacklevel <= 2'b01; // Go back to 1 CALL in progress
  48. 14'b1000_????_????_01: stacklevel <= 2'b00; // Go back to no CALL in progress
  49. 14'b1000_????_????_00: stacklevel <= 2'b00; // Ignore
  50. default:
  51. stacklevel <= stacklevel;
  52. endcase
  53. end
  54. end
  55. always @(posedge clk) begin
  56. if(!rst_n)
  57. inst <= 12'h000;
  58. else begin
  59. if(skip == 1'b1)
  60. inst <= 12'b000000000000; // FORCE NOP
  61. else
  62. inst <= inst_data;
  63. end
  64. end
  65. always @(inst or aluz) begin
  66. casex ({inst, aluz})
  67. 13'b10??_????_????_?: skip = 1'b1; // A GOTO, CALL or RETLW instructions
  68. 13'b0110_????_????_1: skip = 1'b1; // BTFSC instruction and aluz == 1
  69. 13'b0111_????_????_0: skip = 1'b1; // BTFSS instruction and aluz == 0
  70. 13'b0010_11??_????_1: skip = 1'b1; // DECFSZ instruction and aluz == 1
  71. 13'b0011_11??_????_1: skip = 1'b1; // INCFSZ instruction and aluz == 1
  72. default: skip = 1'b0;
  73. endcase
  74. end

二、指令译码

        主要是通过组合逻辑硬件电路(Look Up Table 的形式)来实现该指令译码,针对指令提供关键的控制、状态信号,具体译码方式参考如下代码。

        aluasel、alubsel:主要是对 ALU 模块的操作数进行选择,操作数一般来自 W 寄存器、F 文件寄存器和指令立即数;

        aluop:主要是对 ALU 模块的操作进行选择,如加、减、与、或、非、异或、左移、右移、半字节交换;

        wwe、fwe:主要是 W 和 F 寄存器的写使能;

        zwe、cwe:主要是对 STATUS 寄存器的 Z 和 C 状态位的写使能;

        bdpol:与面向位操作类指令有关;

        tris:控制 I/O 的输入输出状态(无);

        option:OPTION 寄存器(无)。

        详细的电路模块和 Verilog 代码如下:

  1. module IDec (
  2. inst,
  3. aluasel,
  4. alubsel,
  5. aluop,
  6. wwe,
  7. fwe,
  8. zwe,
  9. cwe,
  10. bdpol,
  11. option,
  12. tris
  13. );
  14. input [11:0] inst;
  15. output [1:0] aluasel;
  16. output [1:0] alubsel;
  17. output [3:0] aluop;
  18. output wwe;
  19. output fwe;
  20. output zwe;
  21. output cwe;
  22. output bdpol;
  23. output option;
  24. output tris;
  25. reg [14:0] decodes;
  26. assign {aluasel, // Select source for ALU A input. 00=W, 01=SBUS, 10=K, 11=BD
  27. alubsel, // Select source for ALU B input. 00=W, 01=SBUS, 10=K, 11="1"
  28. aluop, // ALU Operation (see comments above for these codes)
  29. wwe, // W register Write Enable
  30. fwe, // File Register Write Enable
  31. zwe, // Status register Z bit update
  32. cwe, // Status register Z bit update
  33. bdpol, // Polarity on bit decode vector (0=no inversion, 1=invert)
  34. tris, // Instruction is an TRIS instruction
  35. option // Instruction is an OPTION instruction
  36. } = decodes;
  37. always @(inst) begin
  38. casex (inst)
  39. // *** Byte-Oriented File Register Operations
  40. //
  41. // A A ALU W F Z C B T O
  42. // L L O W W W W D R P
  43. // U U P E E E E P I T
  44. // A B O S
  45. // L
  46. 12'b0000_0000_0000: decodes = 15'b00_00_0000_0_0_0_0_0_0_0; // NOP
  47. 12'b0000_001X_XXXX: decodes = 15'b00_00_0010_0_1_0_0_0_0_0; // MOVWF
  48. 12'b0000_0100_0000: decodes = 15'b00_00_0011_1_0_1_0_0_0_0; // CLRW
  49. 12'b0000_011X_XXXX: decodes = 15'b00_00_0011_0_1_1_0_0_0_0; // CLRF
  50. 12'b0000_100X_XXXX: decodes = 15'b01_00_1000_1_0_1_1_0_0_0; // SUBWF (d=0)
  51. 12'b0000_101X_XXXX: decodes = 15'b01_00_1000_0_1_1_1_0_0_0; // SUBWF (d=1)
  52. 12'b0000_110X_XXXX: decodes = 15'b01_11_1000_1_0_1_0_0_0_0; // DECF (d=0)
  53. 12'b0000_111X_XXXX: decodes = 15'b01_11_1000_0_1_1_0_0_0_0; // DECF (d=1)
  54. 12'b0001_000X_XXXX: decodes = 15'b00_01_0010_1_0_1_0_0_0_0; // IORWF (d=0)
  55. 12'b0001_001X_XXXX: decodes = 15'b00_01_0010_0_1_1_0_0_0_0; // IORWF (d=1)
  56. 12'b0001_010X_XXXX: decodes = 15'b00_01_0001_1_0_1_0_0_0_0; // ANDWF (d=0)
  57. 12'b0001_011X_XXXX: decodes = 15'b00_01_0001_0_1_1_0_0_0_0; // ANDWF (d=1)
  58. 12'b0001_100X_XXXX: decodes = 15'b00_01_0011_1_0_1_0_0_0_0; // XORWF (d=0)
  59. 12'b0001_101X_XXXX: decodes = 15'b00_01_0011_0_1_1_0_0_0_0; // XORWF (d=1)
  60. 12'b0001_110X_XXXX: decodes = 15'b00_01_0000_1_0_1_1_0_0_0; // ADDWF (d=0)
  61. 12'b0001_111X_XXXX: decodes = 15'b00_01_0000_0_1_1_1_0_0_0; // ADDWF (d=1)
  62. 12'b0010_000X_XXXX: decodes = 15'b01_01_0010_1_0_1_0_0_0_0; // MOVF (d=0)
  63. 12'b0010_001X_XXXX: decodes = 15'b01_01_0010_0_1_1_0_0_0_0; // MOVF (d=1)
  64. 12'b0010_010X_XXXX: decodes = 15'b01_01_0100_1_0_1_0_0_0_0; // COMF (d=0)
  65. 12'b0010_011X_XXXX: decodes = 15'b01_01_0100_0_1_1_0_0_0_0; // COMF (d=1)
  66. 12'b0010_100X_XXXX: decodes = 15'b01_11_0000_1_0_1_0_0_0_0; // INCF (d=0)
  67. 12'b0010_101X_XXXX: decodes = 15'b01_11_0000_0_1_1_0_0_0_0; // INCF (d=1)
  68. 12'b0010_110X_XXXX: decodes = 15'b01_11_1000_1_0_0_0_0_0_0; // DECFSZ(d=0)
  69. 12'b0010_111X_XXXX: decodes = 15'b01_11_1000_0_1_0_0_0_0_0; // DECFSZ(d=1)
  70. 12'b0011_000X_XXXX: decodes = 15'b01_01_0101_1_0_0_1_0_0_0; // RRF (d=0)
  71. 12'b0011_001X_XXXX: decodes = 15'b01_01_0101_0_1_0_1_0_0_0; // RRF (d=1)
  72. 12'b0011_010X_XXXX: decodes = 15'b01_01_0110_1_0_0_1_0_0_0; // RLF (d=0)
  73. 12'b0011_011X_XXXX: decodes = 15'b01_01_0110_0_1_0_1_0_0_0; // RLF (d=1)
  74. 12'b0011_100X_XXXX: decodes = 15'b01_01_0111_1_0_0_0_0_0_0; // SWAPF (d=0)
  75. 12'b0011_101X_XXXX: decodes = 15'b01_01_0111_0_1_0_0_0_0_0; // SWAPF (d=1)
  76. 12'b0011_110X_XXXX: decodes = 15'b01_11_0000_1_0_0_0_0_0_0; // INCFSZ(d=0)
  77. 12'b0011_111X_XXXX: decodes = 15'b01_11_0000_0_1_0_0_0_0_0; // INCFSZ(d=1)
  78. // *** Bit-Oriented File Register Operations
  79. 12'b0100_XXXX_XXXX: decodes = 15'b11_01_0001_0_1_0_0_1_0_0; // BCF
  80. 12'b0101_XXXX_XXXX: decodes = 15'b11_01_0010_0_1_0_0_0_0_0; // BSF
  81. 12'b0110_XXXX_XXXX: decodes = 15'b11_01_0001_0_0_0_0_0_0_0; // BTFSC
  82. 12'b0111_XXXX_XXXX: decodes = 15'b11_01_0001_0_0_0_0_0_0_0; // BTFSS
  83. // *** Literal and Control Operations
  84. 12'b0000_0000_0010: decodes = 15'b00_00_0010_0_1_0_0_0_0_1; // OPTION
  85. 12'b0000_0000_0011: decodes = 15'b00_00_0000_0_0_0_0_0_0_0; // SLEEP
  86. 12'b0000_0000_0100: decodes = 15'b00_00_0000_0_0_0_0_0_0_0; // CLRWDT
  87. 12'b0000_0000_0101: decodes = 15'b00_00_0010_0_1_0_0_0_1_0; // TRIS 5
  88. 12'b0000_0000_0110: decodes = 15'b00_00_0010_0_1_0_0_0_1_0; // TRIS 6
  89. 12'b0000_0000_0111: decodes = 15'b00_00_0010_0_1_0_0_0_1_0; // TRIS 7
  90. 12'b1000_XXXX_XXXX: decodes = 15'b10_10_0010_1_0_0_0_0_0_0; // RETLW
  91. 12'b1001_XXXX_XXXX: decodes = 15'b10_10_0010_0_0_0_0_0_0_0; // CALL
  92. 12'b101X_XXXX_XXXX: decodes = 15'b10_10_0010_0_0_0_0_0_0_0; // GOTO
  93. 12'b1100_XXXX_XXXX: decodes = 15'b10_10_0010_1_0_0_0_0_0_0; // MOVLW
  94. 12'b1101_XXXX_XXXX: decodes = 15'b00_10_0010_1_0_1_0_0_0_0; // IORLW
  95. 12'b1110_XXXX_XXXX: decodes = 15'b00_10_0001_1_0_1_0_0_0_0; // ANDLW
  96. 12'b1111_XXXX_XXXX: decodes = 15'b00_10_0011_1_0_1_0_0_0_0; // XORLW
  97. default: decodes = 15'b00_00_0000_0_0_0_0_0_0_0;
  98. endcase
  99. end
  100. endmodule

三、指令执行

        该 ALU 模块基本上是能够执行所有的指令操作的,可能不是最优的,但是却是完备的。

        alua、alub:操作数,通过选择操作数,如 W 寄存器、F 寄存器 sbus、常数 K、位操作数 bd、以及常数 1,来进行对应指令的数据操作;

        aluop:操作码,有加、减、与、或、非、异或、左移、右移、半字节交换九种算术逻辑操作;

        cin:作为右移操作 RRF 的低位;

        aluout:运算结果,作为 ALU 模块的输出,输出到数据总线中,并最终选择是否保存在 W 寄存器还是 F 寄存器中;如 aluout -> W or aluout -> dbus -> regfilein --> regfileout ...> sbus

        zout、cout:标志位,ALU 操作可能引起的状态位的改变。

        详细的电路模块和 Verilog 代码如下:

  1. module ALU(
  2. alua,
  3. alub,
  4. aluop,
  5. cin,
  6. aluout,
  7. zout,
  8. cout
  9. );
  10. input [7:0] alua;
  11. input [7:0] alub;
  12. input [3:0] aluop;
  13. input cin;
  14. output reg [7:0] aluout;
  15. output reg zout;
  16. output reg cout;
  17. reg addercout;
  18. parameter ALUOP_ADD = 4'b0000;
  19. parameter ALUOP_SUB = 4'b1000;
  20. parameter ALUOP_AND = 4'b0001;
  21. parameter ALUOP_OR = 4'b0010;
  22. parameter ALUOP_XOR = 4'b0011;
  23. parameter ALUOP_COM = 4'b0100;
  24. parameter ALUOP_ROR = 4'b0101;
  25. parameter ALUOP_ROL = 4'b0110;
  26. parameter ALUOP_SWAP = 4'b0111;
  27. always @(alua or alub or cin or aluop) begin
  28. case (aluop)
  29. ALUOP_ADD: {addercout, aluout} = alua + alub;
  30. ALUOP_SUB: {addercout, aluout} = alua - alub;
  31. ALUOP_AND: {addercout, aluout} = {1'b0, alua & alub};
  32. ALUOP_OR: {addercout, aluout} = {1'b0, alua | alub};
  33. ALUOP_XOR: {addercout, aluout} = {1'b0, alua ^ alub};
  34. ALUOP_COM: {addercout, aluout} = {1'b0, ~alua};
  35. ALUOP_ROR: {addercout, aluout} = {alua[0], cin, alua[7:1]};
  36. ALUOP_ROL: {addercout, aluout} = {alua[7], alua[6:0], cin};
  37. ALUOP_SWAP: {addercout, aluout} = {1'b0, alua[3:0], alua[7:4]};
  38. default: {addercout, aluout} = {1'b0, 8'h00};
  39. endcase
  40. end
  41. always @(aluout)
  42. zout = (aluout == 8'h00);
  43. always @(addercout or aluop)
  44. if(aluop == ALUOP_SUB)
  45. cout = ~addercout;
  46. else
  47. cout = addercout;
  48. endmodule
  49. always @(aluasel or w or sbus or k or bd) begin
  50. case (aluasel)
  51. 2'b00: alua = w;
  52. 2'b01: alua = sbus;
  53. 2'b10: alua = k;
  54. 2'b11: alua = bd;
  55. endcase
  56. end
  57. always @(alubsel or w or sbus or k) begin
  58. case (alubsel)
  59. 2'b00: alub = w;
  60. 2'b01: alub = sbus;
  61. 2'b10: alub = k;
  62. 2'b11: alub = 8'b00000001;
  63. endcase
  64. end

四、直接访存、间接访存、相对访存

        直接访存就是指令中存在着寄存器或者存储器的地址;

        间接访存就是通过访问寄存器,然后寄存器中存在着寄存器或者存储器的地址;例如 INDF Register,是一个全局寄存器,在所有的 Bank 中都有映射,而无需考虑 Bank 的设定,它本身不代表地址,而是代表间接地址所指向的单元;

        相对访存就是页面访存,通过扩展页,来提高存储的容量,通过对页地址进行选择(在 STATUS 的 PA1 和 PA0 中),作为 PC 值高位,来实现访存的一种方式。

        通过对指令地址的判定,输出对应的控制信号,选择对应的寄存器进行读写。

        详细的电路模块和 Verilog 代码如下:

  1. always @(fsel or fsr) begin
  2. if (fsel == INDF_ADDRESS)
  3. fileaddr = fsr[6:0]; // Indirect
  4. else
  5. fileaddr = {fsr[6:5], fsel}; // Direct
  6. end
  7. always @(fileaddr) begin
  8. casex (fileaddr)
  9. 7'bXX00XXX: begin
  10. specialsel = 1'b1;
  11. regfilesel = 1'b0;
  12. end
  13. default: begin
  14. specialsel = 1'b0;
  15. regfilesel = 1'b1;
  16. end
  17. endcase
  18. end
  19. always @(*) begin
  20. if(specialsel) begin
  21. case (fsel[2:0])
  22. 3'h0: sbus = fsr;
  23. 3'h1: sbus = tmr0;
  24. 3'h2: sbus = pc[7:0];
  25. 3'h3: sbus = status;
  26. 3'h4: sbus = fsr;
  27. 3'h5: sbus = porta; // PORTA is an input-only port
  28. 3'h6: sbus = portb; // PORTB is an output-only port
  29. 3'h7: sbus = portc; // PORTC is an output-only port
  30. endcase
  31. end
  32. else begin
  33. if(regfilesel)
  34. sbus = regfileout;
  35. else
  36. sbus = 8'h00;
  37. end
  38. end

五、F 寄存器和 W 寄存器

        F 寄存器分为特殊寄存器和通用寄存器,特殊寄存器是作为一个单独的寄存器进行存放,和 W 寄存器一样,通用寄存器是以 RAM 的形式存在。它们的读写延时为写入数据需要一个时钟,读出数据不需要时钟。 

        关键的电路模块和 Verilog 代码如下:

  1. `define DEBUG_SHOWREADS
  2. `define DEBUG_SHOWWRITES
  3. module regs(clk, rst_n, we, re, bank, location, din, dout);
  4. input clk;
  5. input rst_n;
  6. input we;
  7. input re;
  8. input [1:0] bank;
  9. input [4:0] location;
  10. input [7:0] din;
  11. output [7:0] dout;
  12. reg [6:0] final_address;
  13. dram dram (
  14. .clk (clk),
  15. .address (final_address),
  16. .we (we),
  17. .din (din),
  18. .dout (dout)
  19. );
  20. always @(bank or location) begin
  21. casex ({bank, location})
  22. 7'b00_01XXX: final_address = {4'b0000, location[2:0]};
  23. 7'b01_01XXX: final_address = {4'b0000, location[2:0]};
  24. 7'b10_01XXX: final_address = {4'b0000, location[2:0]};
  25. 7'b11_01XXX: final_address = {4'b0000, location[2:0]};
  26. // Bank #0
  27. 7'b00_10XXX: final_address = {4'b0001, location[2:0]};
  28. 7'b00_11XXX: final_address = {4'b0010, location[2:0]};
  29. // Bank #1
  30. 7'b01_10XXX: final_address = {4'b0011, location[2:0]};
  31. 7'b01_11XXX: final_address = {4'b0100, location[2:0]};
  32. // Bank #2
  33. 7'b10_10XXX: final_address = {4'b0101, location[2:0]};
  34. 7'b10_11XXX: final_address = {4'b0110, location[2:0]};
  35. // Bank #3
  36. 7'b11_10XXX: final_address = {4'b0111, location[2:0]};
  37. 7'b11_11XXX: final_address = {4'b1000, location[2:0]};
  38. default: final_address = {4'b0000, location[2:0]};
  39. endcase
  40. end
  41. endmodule
  42. module dram (
  43. clk,
  44. address,
  45. we,
  46. din,
  47. dout
  48. );
  49. input clk;
  50. input [6:0] address;
  51. input we;
  52. input [7:0] din;
  53. output [7:0] dout;
  54. parameter word_depth = 72;
  55. reg [7:0] mem [0:word_depth-1];
  56. assign dout = mem[address];
  57. always @(posedge clk)
  58. if (we)
  59. mem[address] <= din;
  60. endmodule

        最近比较忙,时间比较赶,关键模块差不多就是这些,当然还是存储映射、特殊寄存器的写入、TMR0 预分频、测试程序的编写之类的,就没给出,整个工程的下载链接

        希望大家能够通过学习较为简单的 RISC CPU 设计,来提高自己的 FPGA 设计水准,那么本文的目的也就达到了~


六、关于工程

        关注“新芯设计”公众号,发送 RISC CPU 即可获取。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Cpp五条/article/detail/628639
推荐阅读
相关标签
  

闽ICP备14008679号