From 3c7beb03c6d233eccd1e81134d175c6f0767b392 Mon Sep 17 00:00:00 2001 From: Liphen Date: Tue, 7 Nov 2023 17:58:40 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0pua-mips=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chisel/Makefile | 3 +- chisel/playground/resources/mycpu_top.v | 106 ++++ .../resources/mycpu_top_for_soc_simulator.v | 132 +++++ chisel/playground/src/Core.scala | 194 ++++++++ chisel/playground/src/CpuConfig.scala | 48 ++ chisel/playground/src/DecoupledGCD.scala | 69 --- chisel/playground/src/Elaborate.scala | 4 +- chisel/playground/src/GCD.scala | 29 -- chisel/playground/src/PuaMips.scala | 32 ++ chisel/playground/src/axi/FIFO.scala | 119 +++++ chisel/playground/src/cache/Cache.scala | 37 ++ .../src/cache/CacheAXIInterface.scala | 80 +++ chisel/playground/src/cache/DCache.scala | 458 ++++++++++++++++++ chisel/playground/src/cache/ICache.scala | 249 ++++++++++ .../playground/src/cache/memory/LUTRam.scala | 65 +++ .../src/cache/memory/LUTRamIP.scala | 65 +++ .../src/cache/memory/PortDefinitions.scala | 37 ++ .../src/cache/memory/SimpleDualPortRam.scala | 90 ++++ .../cache/memory/SimpleDualPortRamIP.scala | 68 +++ chisel/playground/src/ctrl/Ctrl.scala | 48 ++ chisel/playground/src/defines/Bundles.scala | 333 +++++++++++++ chisel/playground/src/defines/Const.scala | 254 ++++++++++ .../playground/src/defines/Cp0Bundles.scala | 128 +++++ .../playground/src/defines/Instructions.scala | 227 +++++++++ .../src/defines/StaticBundles.scala | 44 ++ chisel/playground/src/defines/Util.scala | 56 +++ chisel/playground/src/mmu/TlbL1D.scala | 70 +++ chisel/playground/src/mmu/TlbL1I.scala | 59 +++ chisel/playground/src/mmu/TlbL2.scala | 69 +++ .../src/pipeline/decoder/ARegfile.scala | 72 +++ .../src/pipeline/decoder/Decoder.scala | 194 ++++++++ .../src/pipeline/decoder/DecoderUnit.scala | 211 ++++++++ .../src/pipeline/decoder/ForwardCtrl.scala | 64 +++ .../src/pipeline/decoder/Issue.scala | 62 +++ .../src/pipeline/decoder/JumpCtrl.scala | 43 ++ .../playground/src/pipeline/execute/ALU.scala | 135 ++++++ .../src/pipeline/execute/BranchCtrl.scala | 35 ++ .../playground/src/pipeline/execute/Cp0.scala | 453 +++++++++++++++++ .../playground/src/pipeline/execute/Div.scala | 160 ++++++ .../pipeline/execute/ExeAccessMemCtrl.scala | 99 ++++ .../src/pipeline/execute/ExecuteStage.scala | 66 +++ .../src/pipeline/execute/ExecuteUnit.scala | 151 ++++++ .../playground/src/pipeline/execute/Fu.scala | 118 +++++ .../src/pipeline/execute/HiLo.scala | 22 + .../src/pipeline/execute/LLbit.scala | 23 + .../playground/src/pipeline/execute/Mul.scala | 225 +++++++++ .../pipeline/fetch/BranchPredictorUnit.scala | 199 ++++++++ .../src/pipeline/fetch/FetchUnit.scala | 57 +++ .../src/pipeline/fetch/InstFifo.scala | 141 ++++++ .../src/pipeline/fetch/PreDecoder.scala | 94 ++++ .../pipeline/memory/DataMemoryAccess.scala | 168 +++++++ .../src/pipeline/memory/MemoryStage.scala | 64 +++ .../src/pipeline/memory/MemoryUnit.scala | 122 +++++ .../src/pipeline/writeback/CommitBuffer.scala | 99 ++++ .../pipeline/writeback/WriteBackStage.scala | 46 ++ .../pipeline/writeback/WriteBackUnit.scala | 78 +++ chisel/playground/test/src/GCDSpec.scala | 48 -- chisel/playground/test/src/test.scala | 18 + 58 files changed, 6291 insertions(+), 149 deletions(-) create mode 100644 chisel/playground/resources/mycpu_top.v create mode 100644 chisel/playground/resources/mycpu_top_for_soc_simulator.v create mode 100644 chisel/playground/src/Core.scala create mode 100644 chisel/playground/src/CpuConfig.scala delete mode 100644 chisel/playground/src/DecoupledGCD.scala delete mode 100644 chisel/playground/src/GCD.scala create mode 100644 chisel/playground/src/PuaMips.scala create mode 100644 chisel/playground/src/axi/FIFO.scala create mode 100644 chisel/playground/src/cache/Cache.scala create mode 100644 chisel/playground/src/cache/CacheAXIInterface.scala create mode 100644 chisel/playground/src/cache/DCache.scala create mode 100644 chisel/playground/src/cache/ICache.scala create mode 100644 chisel/playground/src/cache/memory/LUTRam.scala create mode 100644 chisel/playground/src/cache/memory/LUTRamIP.scala create mode 100644 chisel/playground/src/cache/memory/PortDefinitions.scala create mode 100644 chisel/playground/src/cache/memory/SimpleDualPortRam.scala create mode 100644 chisel/playground/src/cache/memory/SimpleDualPortRamIP.scala create mode 100644 chisel/playground/src/ctrl/Ctrl.scala create mode 100644 chisel/playground/src/defines/Bundles.scala create mode 100644 chisel/playground/src/defines/Const.scala create mode 100644 chisel/playground/src/defines/Cp0Bundles.scala create mode 100644 chisel/playground/src/defines/Instructions.scala create mode 100644 chisel/playground/src/defines/StaticBundles.scala create mode 100644 chisel/playground/src/defines/Util.scala create mode 100644 chisel/playground/src/mmu/TlbL1D.scala create mode 100644 chisel/playground/src/mmu/TlbL1I.scala create mode 100644 chisel/playground/src/mmu/TlbL2.scala create mode 100644 chisel/playground/src/pipeline/decoder/ARegfile.scala create mode 100644 chisel/playground/src/pipeline/decoder/Decoder.scala create mode 100644 chisel/playground/src/pipeline/decoder/DecoderUnit.scala create mode 100644 chisel/playground/src/pipeline/decoder/ForwardCtrl.scala create mode 100644 chisel/playground/src/pipeline/decoder/Issue.scala create mode 100644 chisel/playground/src/pipeline/decoder/JumpCtrl.scala create mode 100644 chisel/playground/src/pipeline/execute/ALU.scala create mode 100644 chisel/playground/src/pipeline/execute/BranchCtrl.scala create mode 100644 chisel/playground/src/pipeline/execute/Cp0.scala create mode 100644 chisel/playground/src/pipeline/execute/Div.scala create mode 100644 chisel/playground/src/pipeline/execute/ExeAccessMemCtrl.scala create mode 100644 chisel/playground/src/pipeline/execute/ExecuteStage.scala create mode 100644 chisel/playground/src/pipeline/execute/ExecuteUnit.scala create mode 100644 chisel/playground/src/pipeline/execute/Fu.scala create mode 100644 chisel/playground/src/pipeline/execute/HiLo.scala create mode 100644 chisel/playground/src/pipeline/execute/LLbit.scala create mode 100644 chisel/playground/src/pipeline/execute/Mul.scala create mode 100644 chisel/playground/src/pipeline/fetch/BranchPredictorUnit.scala create mode 100644 chisel/playground/src/pipeline/fetch/FetchUnit.scala create mode 100644 chisel/playground/src/pipeline/fetch/InstFifo.scala create mode 100644 chisel/playground/src/pipeline/fetch/PreDecoder.scala create mode 100644 chisel/playground/src/pipeline/memory/DataMemoryAccess.scala create mode 100644 chisel/playground/src/pipeline/memory/MemoryStage.scala create mode 100644 chisel/playground/src/pipeline/memory/MemoryUnit.scala create mode 100644 chisel/playground/src/pipeline/writeback/CommitBuffer.scala create mode 100644 chisel/playground/src/pipeline/writeback/WriteBackStage.scala create mode 100644 chisel/playground/src/pipeline/writeback/WriteBackUnit.scala delete mode 100644 chisel/playground/test/src/GCDSpec.scala create mode 100644 chisel/playground/test/src/test.scala diff --git a/chisel/Makefile b/chisel/Makefile index 84af3a0..6655f60 100644 --- a/chisel/Makefile +++ b/chisel/Makefile @@ -6,7 +6,7 @@ test: mill -i __.test verilog: - $(call git_commit, "generate verilog") + $(MAKE) clean mkdir -p $(BUILD_DIR) mill -i __.test.runMain Elaborate -td $(BUILD_DIR) @@ -31,7 +31,6 @@ clean: .PHONY: test verilog help compile bsp reformat checkformat clean sim: - $(call git_commit, "sim RTL") # DO NOT REMOVE THIS LINE!!! @echo "Write this Makefile by yourself." -include ../Makefile diff --git a/chisel/playground/resources/mycpu_top.v b/chisel/playground/resources/mycpu_top.v new file mode 100644 index 0000000..6e1637f --- /dev/null +++ b/chisel/playground/resources/mycpu_top.v @@ -0,0 +1,106 @@ +module mycpu_top ( + input [ 5:0] ext_int, + input aclk, + input aresetn, + //axi interface + + //read request + output [ 3:0] arid, + output [31:0] araddr, + output [ 7:0] arlen, + output [ 2:0] arsize, + output [ 1:0] arburst, + output [ 1:0] arlock, + output [ 3:0] arcache, + output [ 2:0] arprot, + output arvalid, + input arready, + + //read response + input [ 3:0] rid, + input [31:0] rdata, + input [ 1:0] rresp, + input rlast, + input rvalid, + output rready, + + //write request + output [ 3:0] awid, + output [31:0] awaddr, + output [ 7:0] awlen, + output [ 2:0] awsize, + output [ 1:0] awburst, + output [ 1:0] awlock, + output [ 3:0] awcache, + output [ 2:0] awprot, + output awvalid, + input awready, + + //write data + output [ 3:0] wid, + output [31:0] wdata, + output [ 3:0] wstrb, + output wlast, + output wvalid, + input wready, + + //write response + input [ 3:0] bid, + input [ 1:0] bresp, + input bvalid, + output bready, + + // trace debug interface + output [31:0] debug_wb_pc, + output [ 3:0] debug_wb_rf_wen, + output [ 4:0] debug_wb_rf_wnum, + output [31:0] debug_wb_rf_wdata +); + +PuaMips puamips( + .clock (aclk ), + .reset (~aresetn ), + .io_ext_int (ext_int ), + .io_axi_ar_bits_id (arid ), + .io_axi_ar_bits_addr (araddr ), + .io_axi_ar_bits_len (arlen ), + .io_axi_ar_bits_size (arsize ), + .io_axi_ar_bits_burst (arburst ), + .io_axi_ar_bits_lock (arlock ), + .io_axi_ar_bits_cache (arcache ), + .io_axi_ar_bits_prot (arprot ), + .io_axi_ar_valid (arvalid ), + .io_axi_ar_ready (arready ), + .io_axi_r_bits_id (rid ), + .io_axi_r_bits_data (rdata ), + .io_axi_r_bits_resp (rresp ), + .io_axi_r_bits_last (rlast ), + .io_axi_r_valid (rvalid ), + .io_axi_r_ready (rready ), + .io_axi_aw_bits_id (awid ), + .io_axi_aw_bits_addr (awaddr ), + .io_axi_aw_bits_len (awlen ), + .io_axi_aw_bits_size (awsize ), + .io_axi_aw_bits_burst (awburst ), + .io_axi_aw_bits_lock (awlock ), + .io_axi_aw_bits_cache (awcache ), + .io_axi_aw_bits_prot (awprot ), + .io_axi_aw_valid (awvalid ), + .io_axi_aw_ready (awready ), + .io_axi_w_bits_id (wid ), + .io_axi_w_bits_data (wdata ), + .io_axi_w_bits_strb (wstrb ), + .io_axi_w_bits_last (wlast ), + .io_axi_w_valid (wvalid ), + .io_axi_w_ready (wready ), + .io_axi_b_bits_id (bid ), + .io_axi_b_bits_resp (bresp ), + .io_axi_b_valid (bvalid ), + .io_axi_b_ready (bready ), + .io_debug_wb_pc (debug_wb_pc ), + .io_debug_wb_rf_wen (debug_wb_rf_wen ), + .io_debug_wb_rf_wnum (debug_wb_rf_wnum ), + .io_debug_wb_rf_wdata (debug_wb_rf_wdata) +); + +endmodule \ No newline at end of file diff --git a/chisel/playground/resources/mycpu_top_for_soc_simulator.v b/chisel/playground/resources/mycpu_top_for_soc_simulator.v new file mode 100644 index 0000000..052c5ff --- /dev/null +++ b/chisel/playground/resources/mycpu_top_for_soc_simulator.v @@ -0,0 +1,132 @@ +module mycpu_top ( + input [ 5:0] ext_int, + input aclk, + input aresetn, + //axi interface + + //read request + output [ 3:0] arid, + output [31:0] araddr, + output [ 7:0] arlen, + output [ 2:0] arsize, + output [ 1:0] arburst, + output [ 1:0] arlock, + output [ 3:0] arcache, + output [ 2:0] arprot, + output arvalid, + input arready, + + //read response + input [ 3:0] rid, + input [31:0] rdata, + input [ 1:0] rresp, + input rlast, + input rvalid, + output rready, + + //write request + output [ 3:0] awid, + output [31:0] awaddr, + output [ 7:0] awlen, + output [ 2:0] awsize, + output [ 1:0] awburst, + output [ 1:0] awlock, + output [ 3:0] awcache, + output [ 2:0] awprot, + output awvalid, + input awready, + + //write data + output [ 3:0] wid, + output [31:0] wdata, + output [ 3:0] wstrb, + output wlast, + output wvalid, + input wready, + + //write response + input [ 3:0] bid, + input [ 1:0] bresp, + input bvalid, + output bready, + + // trace debug interface + output [31:0] debug_wb_pc, + output [ 3:0] debug_wb_rf_wen, + output [ 4:0] debug_wb_rf_wnum, + output [31:0] debug_wb_rf_wdata, + // for soc-simulator + output [31:0] statistic_cpu_soc_cp0_count, + output [31:0] statistic_cpu_soc_cp0_random, + output [31:0] statistic_cpu_soc_cp0_cause, + output statistic_cpu_soc_int, + output statistic_cpu_soc_commit, + + // bpu statistic + output [31:0] statistic_cpu_bpu_branch, + output [31:0] statistic_cpu_bpu_success, + + // cache statistic + output [31:0] statistic_cache_icache_request, + output [31:0] statistic_cache_icache_hit, + output [31:0] statistic_cache_dcache_request, + output [31:0] statistic_cache_dcache_hit +); + +PuaMips puamips( + .clock (aclk ), + .reset (~aresetn ), + .io_ext_int (ext_int ), + .io_axi_ar_bits_id (arid ), + .io_axi_ar_bits_addr (araddr ), + .io_axi_ar_bits_len (arlen ), + .io_axi_ar_bits_size (arsize ), + .io_axi_ar_bits_burst (arburst ), + .io_axi_ar_bits_lock (arlock ), + .io_axi_ar_bits_cache (arcache ), + .io_axi_ar_bits_prot (arprot ), + .io_axi_ar_valid (arvalid ), + .io_axi_ar_ready (arready ), + .io_axi_r_bits_id (rid ), + .io_axi_r_bits_data (rdata ), + .io_axi_r_bits_resp (rresp ), + .io_axi_r_bits_last (rlast ), + .io_axi_r_valid (rvalid ), + .io_axi_r_ready (rready ), + .io_axi_aw_bits_id (awid ), + .io_axi_aw_bits_addr (awaddr ), + .io_axi_aw_bits_len (awlen ), + .io_axi_aw_bits_size (awsize ), + .io_axi_aw_bits_burst (awburst ), + .io_axi_aw_bits_lock (awlock ), + .io_axi_aw_bits_cache (awcache ), + .io_axi_aw_bits_prot (awprot ), + .io_axi_aw_valid (awvalid ), + .io_axi_aw_ready (awready ), + .io_axi_w_bits_id (wid ), + .io_axi_w_bits_data (wdata ), + .io_axi_w_bits_strb (wstrb ), + .io_axi_w_bits_last (wlast ), + .io_axi_w_valid (wvalid ), + .io_axi_w_ready (wready ), + .io_axi_b_bits_id (bid ), + .io_axi_b_bits_resp (bresp ), + .io_axi_b_valid (bvalid ), + .io_axi_b_ready (bready ), + .io_debug_wb_pc (debug_wb_pc ), + .io_debug_wb_rf_wen (debug_wb_rf_wen ), + .io_debug_wb_rf_wnum (debug_wb_rf_wnum ), + .io_debug_wb_rf_wdata (debug_wb_rf_wdata ), + .io_statistic_cpu_soc_cp0_count (statistic_cpu_soc_cp0_count ), + .io_statistic_cpu_soc_cp0_random (statistic_cpu_soc_cp0_random ), + .io_statistic_cpu_soc_cp0_cause (statistic_cpu_soc_cp0_cause ), + .io_statistic_cpu_soc_int (statistic_cpu_soc_int ), + .io_statistic_cpu_soc_commit (statistic_cpu_soc_commit ), + .io_statistic_cpu_bpu_success (statistic_cpu_bpu_success ), + .io_statistic_cpu_bpu_branch (statistic_cpu_bpu_branch ), + .io_statistic_cache_icache_request (statistic_cache_icache_request ), + .io_statistic_cache_icache_hit (statistic_cache_icache_hit ), + .io_statistic_cache_dcache_request (statistic_cache_dcache_request ), + .io_statistic_cache_dcache_hit (statistic_cache_dcache_hit ) +); +endmodule diff --git a/chisel/playground/src/Core.scala b/chisel/playground/src/Core.scala new file mode 100644 index 0000000..e667c3b --- /dev/null +++ b/chisel/playground/src/Core.scala @@ -0,0 +1,194 @@ +package cpu + +import chisel3._ +import chisel3.util._ +import chisel3.internal.DontCareBinding + +import defines._ +import defines.Const._ +import pipeline.fetch._ +import pipeline.decoder._ +import pipeline.execute._ +import pipeline.memory._ +import pipeline.writeback._ +import ctrl._ +import mmu._ +import chisel3.util.experimental.decode.decoder +import cpu.pipeline.fetch.InstFifo + +class Core(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val ext_int = Input(UInt(6.W)) + val inst = new Cache_ICache() + val data = new Cache_DCache() + val debug = new DEBUG() + val statistic = if (!config.build) Some(new CPUStatistic()) else None + }) + + val ctrl = Module(new Ctrl()).io + val fetchUnit = Module(new FetchUnit()).io + val bpu = Module(new BranchPredictorUnit()).io + val instFifo = Module(new InstFifo()).io + val decoderUnit = Module(new DecoderUnit()).io + val regfile = Module(new ARegFile()).io + val executeStage = Module(new ExecuteStage()).io + val executeUnit = Module(new ExecuteUnit()).io + val cp0 = Module(new Cp0()).io + val memoryStage = Module(new MemoryStage()).io + val memoryUnit = Module(new MemoryUnit()).io + val writeBackStage = Module(new WriteBackStage()).io + val writeBackUnit = Module(new WriteBackUnit()).io + val tlbL1I = Module(new TlbL1I()).io + val tlbL1D = Module(new TlbL1D()).io + + tlbL1I.addr := fetchUnit.iCache.pc + tlbL1I.fence := executeUnit.executeStage.inst0.inst_info.tlbfence + tlbL1I.cpu_stall := !ctrl.fetchUnit.allow_to_go + tlbL1I.icache_stall := io.inst.icache_stall + tlbL1I.cache <> io.inst.tlb + + tlbL1D.addr := memoryUnit.dataMemory.out.addr + tlbL1D.fence := memoryUnit.memoryStage.inst0.inst_info.tlbfence + tlbL1D.cpu_stall := !ctrl.memoryUnit.allow_to_go + tlbL1D.dcache_stall := io.data.dcache_stall + tlbL1D.mem_write := memoryUnit.dataMemory.out.wen.orR + tlbL1D.mem_en := memoryUnit.dataMemory.out.en + tlbL1D.cache <> io.data.tlb + + ctrl.instFifo.has2insts := !(instFifo.empty || instFifo.almost_empty) + ctrl.decoderUnit <> decoderUnit.ctrl + ctrl.executeUnit <> executeUnit.ctrl + ctrl.memoryUnit <> memoryUnit.ctrl + ctrl.writeBackUnit <> writeBackUnit.ctrl + ctrl.cacheCtrl.iCache_stall := io.inst.icache_stall + ctrl.cacheCtrl.dCache_stall := io.data.dcache_stall + + fetchUnit.memory <> memoryUnit.fetchUnit + fetchUnit.execute <> executeUnit.fetchUnit + fetchUnit.decoder <> decoderUnit.fetchUnit + fetchUnit.instFifo.full := instFifo.full + fetchUnit.iCache.inst_valid := io.inst.inst_valid + io.inst.addr(0) := fetchUnit.iCache.pc + io.inst.addr(1) := fetchUnit.iCache.pc_next + for (i <- 2 until config.instFetchNum) { + io.inst.addr(i) := fetchUnit.iCache.pc_next + ((i - 1) * 4).U + } + + bpu.decoder.ena := ctrl.decoderUnit.allow_to_go + bpu.decoder.op := decoderUnit.bpu.decoded_inst0.op + bpu.decoder.inst := decoderUnit.bpu.decoded_inst0.inst + bpu.decoder.rs1 := decoderUnit.bpu.decoded_inst0.reg1_raddr + bpu.decoder.rs2 := decoderUnit.bpu.decoded_inst0.reg2_raddr + bpu.decoder.pc := decoderUnit.bpu.pc + bpu.decoder.pc_plus4 := decoderUnit.bpu.pc + 4.U + bpu.decoder.pht_index := decoderUnit.bpu.pht_index + decoderUnit.bpu.update_pht_index := bpu.decoder.update_pht_index + bpu.execute <> executeUnit.bpu + if (config.branchPredictor == "pesudo") { + bpu.regfile.get <> regfile.bpu.get + } + decoderUnit.bpu.branch_inst := bpu.decoder.branch_inst + decoderUnit.bpu.pred_branch := bpu.decoder.pred_branch + decoderUnit.bpu.branch_target := bpu.decoder.branch_target + + instFifo.do_flush := ctrl.decoderUnit.do_flush + instFifo.flush_delay_slot := ctrl.instFifo.delay_slot_do_flush + instFifo.icache_stall := io.inst.icache_stall + instFifo.jump_branch_inst := decoderUnit.instFifo.jump_branch_inst + instFifo.delay_sel_flush := Mux( + ctrl.executeUnit.branch, + !(executeUnit.memoryStage.inst1.ex.bd || decoderUnit.executeStage.inst0.ex.bd), + Mux(ctrl.decoderUnit.branch, !decoderUnit.instFifo.allow_to_go(1), false.B), + ) + instFifo.decoder_delay_flush := ctrl.decoderUnit.branch + instFifo.execute_delay_flush := ctrl.executeUnit.branch + instFifo.ren <> decoderUnit.instFifo.allow_to_go + decoderUnit.instFifo.inst <> instFifo.read + + for (i <- 0 until config.instFetchNum) { + instFifo.write(i).pht_index := bpu.instBuffer.pht_index(i) + bpu.instBuffer.pc(i) := instFifo.write(i).pc + instFifo.wen(i) := io.inst.inst_valid(i) + instFifo.write(i).tlb.refill := tlbL1I.tlb1.refill + instFifo.write(i).tlb.invalid := tlbL1I.tlb1.invalid + instFifo.write(i).pc := io.inst.addr(0) + (i * 4).U + instFifo.write(i).inst := io.inst.inst(i) + } + + decoderUnit.instFifo.info.empty := instFifo.empty + decoderUnit.instFifo.info.almost_empty := instFifo.almost_empty + decoderUnit.instFifo.info.inst0_is_in_delayslot := instFifo.inst0_is_in_delayslot + decoderUnit.regfile <> regfile.read + for (i <- 0 until (config.fuNum)) { + decoderUnit.forward(i).exe := executeUnit.decoderUnit.forward(i).exe + decoderUnit.forward(i).mem_wreg := executeUnit.decoderUnit.forward(i).exe_mem_wreg + decoderUnit.forward(i).mem := memoryUnit.decoderUnit(i) + } + decoderUnit.cp0 <> cp0.decoderUnit + decoderUnit.executeStage <> executeStage.decoderUnit + + executeStage.ctrl.clear(0) := ctrl.memoryUnit.flush_req || + !decoderUnit.executeStage.inst0.ex.bd && ctrl.executeUnit.do_flush && ctrl.executeUnit.allow_to_go || + !ctrl.decoderUnit.allow_to_go && ctrl.executeUnit.allow_to_go + executeStage.ctrl.clear(1) := ctrl.memoryUnit.flush_req || + (ctrl.executeUnit.do_flush && decoderUnit.executeStage.inst1.allow_to_go) || + (ctrl.executeUnit.allow_to_go && !decoderUnit.executeStage.inst1.allow_to_go) + executeStage.ctrl.inst0_allow_to_go := ctrl.executeUnit.allow_to_go + + executeUnit.decoderUnit.inst0_bd := decoderUnit.executeStage.inst0.ex.bd + executeUnit.executeStage <> executeStage.executeUnit + executeUnit.cp0 <> cp0.executeUnit + executeUnit.memoryStage <> memoryStage.executeUnit + + cp0.ctrl.exe_stall := !ctrl.executeUnit.allow_to_go + cp0.ctrl.mem_stall := !ctrl.memoryUnit.allow_to_go + cp0.tlb(0).vpn2 := tlbL1I.tlb2.vpn2 + cp0.tlb(1).vpn2 := tlbL1D.tlb2.vpn2 + cp0.ext_int := io.ext_int + tlbL1I.tlb2.found := cp0.tlb(0).found + tlbL1D.tlb2.found := cp0.tlb(1).found + tlbL1I.tlb2.entry := cp0.tlb(0).info + tlbL1D.tlb2.entry := cp0.tlb(1).info + + memoryStage.ctrl.allow_to_go := ctrl.memoryUnit.allow_to_go + memoryStage.ctrl.clear := ctrl.memoryUnit.do_flush + + memoryUnit.memoryStage <> memoryStage.memoryUnit + memoryUnit.cp0 <> cp0.memoryUnit + memoryUnit.writeBackStage <> writeBackStage.memoryUnit + + memoryUnit.dataMemory.in.tlb <> tlbL1D.tlb1 + memoryUnit.dataMemory.in.rdata := io.data.rdata + io.data.en := memoryUnit.dataMemory.out.en + io.data.rlen := memoryUnit.dataMemory.out.rlen + io.data.wen := memoryUnit.dataMemory.out.wen + io.data.wdata := memoryUnit.dataMemory.out.wdata + io.data.addr := memoryUnit.dataMemory.out.addr + + writeBackStage.memoryUnit <> memoryUnit.writeBackStage + writeBackStage.ctrl.allow_to_go := ctrl.writeBackUnit.allow_to_go + writeBackStage.ctrl.clear := ctrl.writeBackUnit.do_flush + + writeBackUnit.writeBackStage <> writeBackStage.writeBackUnit + writeBackUnit.ctrl <> ctrl.writeBackUnit + regfile.write <> writeBackUnit.regfile + + io.debug <> writeBackUnit.debug + + io.inst.fence := executeUnit.executeStage.inst0.inst_info.ifence + io.inst.fence_addr := executeUnit.executeStage.inst0.inst_info.mem_addr + io.data.fence := memoryUnit.memoryStage.inst0.inst_info.dfence + io.data.fence_addr := memoryUnit.memoryStage.inst0.inst_info.mem_addr + io.data.execute_addr := executeUnit.memoryStage.inst0.mem.addr + io.inst.req := !instFifo.full + io.inst.cpu_stall := !ctrl.fetchUnit.allow_to_go + io.data.cpu_stall := !ctrl.memoryUnit.allow_to_go + + // ===----------------------------------------------------------------=== + // statistic + // ===----------------------------------------------------------------=== + if (!config.build) { + io.statistic.get.soc <> writeBackUnit.statistic.get + io.statistic.get.bpu <> executeUnit.statistic.get + } +} diff --git a/chisel/playground/src/CpuConfig.scala b/chisel/playground/src/CpuConfig.scala new file mode 100644 index 0000000..ad6a4ae --- /dev/null +++ b/chisel/playground/src/CpuConfig.scala @@ -0,0 +1,48 @@ +package cpu + +import chisel3.util._ + +case class CpuConfig( + val build: Boolean = false, // 是否为build模式 + val hasCommitBuffer: Boolean = false, // 是否有提交缓存 + val decoderNum: Int = 2, // 同时访问寄存器的指令数 + val commitNum: Int = 2, // 同时提交的指令数 + val fuNum: Int = 2, // 功能单元数 + val instFetchNum: Int = 2, // iCache取到的指令数量 + val instFifoDepth: Int = 8, // 指令缓存深度 + val writeBufferDepth: Int = 16, // 写缓存深度 + val mulClockNum: Int = 2, // 乘法器的时钟周期数 + val divClockNum: Int = 8, // 除法器的时钟周期数 + val branchPredictor: String = "adaptive",// adaptive, pesudo, global +) + +case class BranchPredictorConfig( + val bhtDepth: Int = 5, + val phtDepth: Int = 6, +) + +case class CacheConfig( + nway: Int = 2, // 路数 + nbank: Int = 8, // bank数 + nset: Int, + bankWidth: Int, // bytes per bank +) { + val config = CpuConfig() + val indexWidth = log2Ceil(nset) // 6 + val bankIndexWidth = log2Ceil(nbank) // 3 + val bankOffsetWidth = log2Ceil(bankWidth) // 3 + val offsetWidth = bankIndexWidth + bankOffsetWidth // 6 + val tagWidth = 32 - indexWidth - offsetWidth // 20 + val tagvWidth = tagWidth + 1 // 21 + val bankWidthBits = bankWidth * 8 // 64 + val burstSize = 16 + val ninst = config.instFetchNum // TODO:改成可随意修改的参数 + require(isPow2(nset)) + require(isPow2(nway)) + require(isPow2(nbank)) + require(isPow2(bankWidth)) + require( + tagWidth + indexWidth + bankIndexWidth + bankOffsetWidth == 32, + "basic request calculation", + ) +} diff --git a/chisel/playground/src/DecoupledGCD.scala b/chisel/playground/src/DecoupledGCD.scala deleted file mode 100644 index 8fd0456..0000000 --- a/chisel/playground/src/DecoupledGCD.scala +++ /dev/null @@ -1,69 +0,0 @@ -import chisel3._ -import chisel3.util.Decoupled - -class GcdInputBundle(val w: Int) extends Bundle { - val value1 = UInt(w.W) - val value2 = UInt(w.W) -} - -class GcdOutputBundle(val w: Int) extends Bundle { - val value1 = UInt(w.W) - val value2 = UInt(w.W) - val gcd = UInt(w.W) -} - -/** - * Compute Gcd using subtraction method. - * Subtracts the smaller from the larger until register y is zero. - * value input register x is then the Gcd. - * Unless first input is zero then the Gcd is y. - * Can handle stalls on the producer or consumer side - */ -class DecoupledGcd(width: Int) extends Module { - val input = IO(Flipped(Decoupled(new GcdInputBundle(width)))) - val output = IO(Decoupled(new GcdOutputBundle(width))) - - val xInitial = Reg(UInt()) - val yInitial = Reg(UInt()) - val x = Reg(UInt()) - val y = Reg(UInt()) - val busy = RegInit(false.B) - val resultValid = RegInit(false.B) - - input.ready := !busy - output.valid := resultValid - output.bits := DontCare - - when(busy) { - when(x > y) { - x := x - y - }.otherwise { - y := y - x - } - when(x === 0.U || y === 0.U) { - when(x === 0.U) { - output.bits.gcd := y - }.otherwise { - output.bits.gcd := x - } - - output.bits.value1 := xInitial - output.bits.value2 := yInitial - resultValid := true.B - - when(output.ready && resultValid) { - busy := false.B - resultValid := false.B - } - } - }.otherwise { - when(input.valid) { - val bundle = input.deq() - x := bundle.value1 - y := bundle.value2 - xInitial := bundle.value1 - yInitial := bundle.value2 - busy := true.B - } - } -} diff --git a/chisel/playground/src/Elaborate.scala b/chisel/playground/src/Elaborate.scala index 6a95895..8a52b9c 100644 --- a/chisel/playground/src/Elaborate.scala +++ b/chisel/playground/src/Elaborate.scala @@ -1,7 +1,9 @@ +import cpu._ import circt.stage._ object Elaborate extends App { - def top = new GCD() + implicit val config = new CpuConfig() + def top = new PuaMips() val generator = Seq(chisel3.stage.ChiselGeneratorAnnotation(() => top)) (new ChiselStage).execute(args, generator :+ CIRCTTargetAnnotation(CIRCTTarget.Verilog)) } diff --git a/chisel/playground/src/GCD.scala b/chisel/playground/src/GCD.scala deleted file mode 100644 index 42c4ce0..0000000 --- a/chisel/playground/src/GCD.scala +++ /dev/null @@ -1,29 +0,0 @@ -import chisel3._ - -/** - * Compute GCD using subtraction method. - * Subtracts the smaller from the larger until register y is zero. - * value in register x is then the GCD - */ -class GCD extends Module { - val io = IO(new Bundle { - val value1 = Input(UInt(16.W)) - val value2 = Input(UInt(16.W)) - val loadingValues = Input(Bool()) - val outputGCD = Output(UInt(16.W)) - val outputValid = Output(Bool()) - }) - - val x = Reg(UInt()) - val y = Reg(UInt()) - - when(x > y) { x := x - y }.otherwise { y := y - x } - - when(io.loadingValues) { - x := io.value1 - y := io.value2 - } - - io.outputGCD := x - io.outputValid := y === 0.U -} diff --git a/chisel/playground/src/PuaMips.scala b/chisel/playground/src/PuaMips.scala new file mode 100644 index 0000000..58f3ad5 --- /dev/null +++ b/chisel/playground/src/PuaMips.scala @@ -0,0 +1,32 @@ +import chisel3._ +import chisel3.util._ +import cache._ +import cpu._ +import cpu.defines._ + +class PuaMips extends Module { + implicit val config = new CpuConfig() + val io = IO(new Bundle { + val ext_int = Input(UInt(6.W)) + val axi = new AXI() + val debug = new DEBUG() + val statistic = if (!config.build) Some(new GlobalStatistic()) else None + }) + val core = Module(new Core()) + val cache = Module(new Cache()) + + core.io.inst <> cache.io.inst + core.io.data <> cache.io.data + + io.ext_int <> core.io.ext_int + io.debug <> core.io.debug + io.axi <> cache.io.axi + + // ===----------------------------------------------------------------=== + // statistic + // ===----------------------------------------------------------------=== + if (!config.build) { + io.statistic.get.cpu <> core.io.statistic.get + io.statistic.get.cache <> cache.io.statistic.get + } +} diff --git a/chisel/playground/src/axi/FIFO.scala b/chisel/playground/src/axi/FIFO.scala new file mode 100644 index 0000000..e7921df --- /dev/null +++ b/chisel/playground/src/axi/FIFO.scala @@ -0,0 +1,119 @@ +package cpu.axi + +import chisel3._ +import chisel3.util._ + +/** A simple FIFO buffer implemented using Chisel's built-in Queue module. + * + * @param dataWidth + * The width of the data to be stored in the buffer. + * @param buffDepth + * The depth of the buffer (i.e. the number of elements it can hold). + * @param addrWidth + * The width of the address used to access the buffer. + */ +class FifoBuffer( + val dataWidth: Int = 32, + val buffDepth: Int = 4, + val addrWidth: Int = 2, +) extends Module { + val io = IO(new Bundle { + val wen = Input(Bool()) // Write enable signal. + val ren = Input(Bool()) // Read enable signal. + val input = Input(UInt(dataWidth.W)) // Data to be written to the buffer. + val output = Output(UInt(dataWidth.W)) // Data read from the buffer. + val empty = Output(Bool()) // Output signal indicating whether the buffer is empty. + val full = Output(Bool()) // Output signal indicating whether the buffer is full. + }) + + // Instantiate a Queue module with the given data width and buffer depth. + val queue = Module(new Queue(UInt(dataWidth.W), buffDepth)) + + // Connect the input and output signals to the Queue module. + queue.io.enq.valid := io.wen + queue.io.enq.bits := io.input + io.full := queue.io.enq.ready === false.B + queue.io.deq.ready := io.ren + io.output := queue.io.deq.bits + io.empty := queue.io.count === 0.U +} + +/** A simple counter that keeps track of the number of elements in a FIFO buffer. + * + * @param buffDepth + * The depth of the buffer (i.e. the number of elements it can hold). + * @param addrWidth + * The width of the address used to access the buffer. + */ +class FifoCount( + val buffDepth: Int = 4, + val addrWidth: Int = 2, +) extends Module { + val io = IO(new Bundle { + val wen = Input(Bool()) + val ren = Input(Bool()) + val empty = Output(Bool()) + val full = Output(Bool()) + }) + + val count = RegInit(0.U(addrWidth.W)) + + io.empty := count === 0.U + io.full := count === buffDepth.U + + when(io.ren && !io.empty) { + count := count - 1.U + }.elsewhen(io.wen && !io.full) { + count := count + 1.U + } +} + +/** A FIFO buffer with a valid signal that checks if the output data is related to a specific value. + * + * @param dataWidth + * The width of the data to be stored in the buffer. + * @param buffDepth + * The depth of the buffer (i.e. the number of elements it can hold). + * @param addrWidth + * The width of the address used to access the buffer. + * @param relatedDataWidth + * The width of the related data used to check if the output data is related to a specific value. + */ +class FifoBufferValid( + val dataWidth: Int = 33, + val buffDepth: Int = 6, + val addrWidth: Int = 3, + val relatedDataWidth: Int = 32, +) extends Module { + val io = IO(new Bundle { + val wen = Input(Bool()) // Write enable signal. + val ren = Input(Bool()) // Read enable signal. + val empty = Output(Bool()) // Output signal indicating whether the buffer is empty. + val full = Output(Bool()) // Output signal indicating whether the buffer is full. + val related_1 = Output( + Bool(), + ) // Output signal indicating whether the output data is related to a specific value. + val input = Input(UInt(dataWidth.W)) // Data to be written to the buffer. + val output = Output(UInt(dataWidth.W)) // Data read from the buffer. + val related_data_1 = Input( + UInt(relatedDataWidth.W), + ) // Related data used to check if the output data is related to a specific value. + }) + + // Instantiate a Queue module with the given data width and buffer depth. + val queue = Module(new Queue(UInt(dataWidth.W), buffDepth)) + + // Connect the input and output signals to the Queue module. + queue.io.enq.valid := io.wen + queue.io.enq.bits := io.input + io.full := queue.io.count === buffDepth.U + io.empty := queue.io.count === 0.U + io.output := queue.io.deq.bits + + // Connect the ready signal to the read enable input. + queue.io.deq.ready := io.ren + + // Check if the output data is related to a specific value. + io.related_1 := queue.io.deq.valid && io.related_data_1 === queue.io.deq + .bits(relatedDataWidth - 1, 0) +} \ No newline at end of file diff --git a/chisel/playground/src/cache/Cache.scala b/chisel/playground/src/cache/Cache.scala new file mode 100644 index 0000000..0955f1f --- /dev/null +++ b/chisel/playground/src/cache/Cache.scala @@ -0,0 +1,37 @@ +package cache + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.CacheConfig +import cpu.CpuConfig + +class Cache(implicit config: CpuConfig) extends Module { + val io = IO(new Bundle { + val inst = Flipped(new Cache_ICache()) + val data = Flipped(new Cache_DCache()) + val axi = new AXI() + val statistic = if (!config.build) Some(new CacheStatistic()) else None + }) + implicit val iCacheConfig = CacheConfig(nset = 64, nbank = 4, bankWidth = 16) + implicit val dCacheConfig = CacheConfig(nset = 128, bankWidth = 4) + + val icache = Module(new ICache(iCacheConfig)) + val dcache = Module(new DCache(dCacheConfig)) + val axi_interface = Module(new CacheAXIInterface()) + + icache.io.axi <> axi_interface.io.icache + dcache.io.axi <> axi_interface.io.dcache + + io.inst <> icache.io.cpu + io.data <> dcache.io.cpu + io.axi <> axi_interface.io.axi + + // ===----------------------------------------------------------------=== + // statistic + // ===----------------------------------------------------------------=== + if (!config.build) { + io.statistic.get.icache <> icache.io.statistic.get + io.statistic.get.dcache <> dcache.io.statistic.get + } +} diff --git a/chisel/playground/src/cache/CacheAXIInterface.scala b/chisel/playground/src/cache/CacheAXIInterface.scala new file mode 100644 index 0000000..d32aaa2 --- /dev/null +++ b/chisel/playground/src/cache/CacheAXIInterface.scala @@ -0,0 +1,80 @@ +package cache + +import chisel3._ +import chisel3.util._ +import cpu.defines._ + +class CacheAXIInterface extends Module { + val io = IO(new Bundle { + val icache = Flipped(new ICache_AXIInterface()) + val dcache = Flipped(new DCache_AXIInterface()) + val axi = new AXI() + }) + val ar_sel = Wire(Bool()) + val ar_sel_lock = RegInit(false.B) + val ar_sel_lock_val = RegInit(false.B) + when(io.axi.ar.valid) { + when(io.axi.ar.ready) { + ar_sel_lock := false.B + }.otherwise { + ar_sel_lock := true.B + ar_sel_lock_val := ar_sel + } + } + + ar_sel := Mux(ar_sel_lock, ar_sel_lock_val, !io.icache.ar.valid && io.dcache.ar.valid) + val r_sel = io.axi.r.bits.id(0) + + // ===----------------------------------------------------------------=== + // dcache + // ===----------------------------------------------------------------=== + io.dcache.ar.ready := io.axi.ar.ready && ar_sel + io.dcache.r.bits.data := Mux(r_sel, io.axi.r.bits.data, 0.U) + io.dcache.r.bits.last := Mux(r_sel, io.axi.r.bits.last, 0.U) + io.dcache.r.valid := Mux(r_sel, io.axi.r.valid, 0.U) + + io.dcache.aw.ready := io.axi.aw.ready + io.dcache.w.ready := io.axi.w.ready + io.dcache.b.valid := io.axi.b.valid + + // ===----------------------------------------------------------------=== + // icache + // ===----------------------------------------------------------------=== + io.icache.ar.ready := io.axi.ar.ready && !ar_sel + io.icache.r.bits.data := Mux(!r_sel, io.axi.r.bits.data, 0.U) + io.icache.r.bits.last := Mux(!r_sel, io.axi.r.bits.last, 0.U) + io.icache.r.valid := Mux(!r_sel, io.axi.r.valid, 0.U) + + // ===----------------------------------------------------------------=== + // axi + // ===----------------------------------------------------------------=== + io.axi.ar.bits.id := ar_sel + io.axi.ar.bits.addr := Mux(ar_sel, io.dcache.ar.bits.addr, io.icache.ar.bits.addr) + io.axi.ar.bits.len := Mux(ar_sel, io.dcache.ar.bits.len, io.icache.ar.bits.len) + io.axi.ar.bits.size := Mux(ar_sel, io.dcache.ar.bits.size, io.icache.ar.bits.size) + io.axi.ar.bits.burst := 1.U + io.axi.ar.bits.lock := 0.U + io.axi.ar.bits.cache := 0.U + io.axi.ar.bits.prot := 0.U + io.axi.ar.valid := Mux(ar_sel, io.dcache.ar.valid, io.icache.ar.valid) + + io.axi.r.ready := Mux(~r_sel, io.icache.r.ready, io.dcache.r.ready) + + io.axi.aw.bits.id := 0.U + io.axi.aw.bits.addr := io.dcache.aw.bits.addr + io.axi.aw.bits.len := io.dcache.aw.bits.len + io.axi.aw.bits.size := io.dcache.aw.bits.size + io.axi.aw.bits.burst := 1.U + io.axi.aw.bits.lock := 0.U + io.axi.aw.bits.cache := 0.U + io.axi.aw.bits.prot := 0.U + io.axi.aw.valid := io.dcache.aw.valid + + io.axi.w.bits.id := 0.U + io.axi.w.bits.data := io.dcache.w.bits.data + io.axi.w.bits.strb := io.dcache.w.bits.strb + io.axi.w.bits.last := io.dcache.w.bits.last + io.axi.w.valid := io.dcache.w.valid + + io.axi.b.ready := io.dcache.b.ready +} diff --git a/chisel/playground/src/cache/DCache.scala b/chisel/playground/src/cache/DCache.scala new file mode 100644 index 0000000..0c27d4a --- /dev/null +++ b/chisel/playground/src/cache/DCache.scala @@ -0,0 +1,458 @@ +// * Cache 设计借鉴了nscscc2021 cqu的cdim * // +package cache + +import chisel3._ +import chisel3.util._ +import memory._ +import cpu.CacheConfig +import cpu.defines._ +import cpu.CpuConfig +import cpu.defines.Const._ + +class WriteBufferUnit extends Bundle { + val data = UInt(DATA_WID.W) + val addr = UInt(DATA_ADDR_WID.W) + val strb = UInt(4.W) + val size = UInt(2.W) +} + +class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module { + val nway: Int = cacheConfig.nway + val nset: Int = cacheConfig.nset + val nbank: Int = cacheConfig.nbank + val bankWidthBits: Int = cacheConfig.bankWidthBits + val tagWidth: Int = cacheConfig.tagWidth + val burstSize: Int = cacheConfig.burstSize + + val io = IO(new Bundle { + val cpu = Flipped(new Cache_DCache()) + val axi = new DCache_AXIInterface() + val statistic = if (!config.build) Some(new DCacheStatistic()) else None + }) + + val tlb_fill = RegInit(false.B) + // * fsm * // + val s_idle :: s_uncached :: s_writeback :: s_replace :: s_save :: Nil = Enum(5) + val state = RegInit(s_idle) + + io.cpu.tlb.fill := tlb_fill + io.cpu.tlb.dcache_is_idle := state === s_idle + io.cpu.tlb.dcache_is_save := state === s_save + + // * valid dirty * // + val valid = RegInit(VecInit(Seq.fill(nset)(VecInit(Seq.fill(nway)(false.B))))) + val dirty = RegInit(VecInit(Seq.fill(nset)(VecInit(Seq.fill(nway)(false.B))))) + val lru = RegInit(VecInit(Seq.fill(nset)(0.U(1.W)))) + + val should_next_addr = (state === s_idle && !tlb_fill) || (state === s_save) + + val write_fifo = Module(new Queue(new WriteBufferUnit(), 4)) + + write_fifo.io.enq.valid := false.B + write_fifo.io.enq.bits := 0.U.asTypeOf(new WriteBufferUnit()) + write_fifo.io.deq.ready := false.B + + val axi_cnt = Counter(burstSize) + val read_ready_cnt = RegInit(0.U(4.W)) + val read_ready_set = RegInit(0.U(6.W)) + + // * victim cache * // + val victim = RegInit(0.U.asTypeOf(new Bundle { + val valid = Bool() + val set = UInt(6.W) + val waddr = UInt(10.W) + val wstrb = Vec(nway, UInt(4.W)) + val working = Bool() + val writeback = Bool() + })) + val victim_cnt = Counter(burstSize) + val victim_addr = Cat(victim.set, victim_cnt.value) + + val fset = io.cpu.fence_addr(11, 6) + val fence = RegInit(0.U.asTypeOf(new Bundle { + val working = Bool() + })) + + val read_buffer = RegInit(VecInit(Seq.fill(16)(0.U(DATA_WID.W)))) + val ar_handshake = RegInit(false.B) + val aw_handshake = RegInit(false.B) + + val data_raddr = Mux(victim.valid, victim_addr, Mux(should_next_addr, io.cpu.execute_addr(11, 2), io.cpu.addr(11, 2))) + val data_wstrb = Wire(Vec(nway, UInt(4.W))) + val data_waddr = Mux(victim.valid, victim.waddr, io.cpu.addr(11, 2)) + val data_wdata = Mux(state === s_replace, io.axi.r.bits.data, io.cpu.wdata) + + val tag_raddr = Mux(victim.valid, victim.set, Mux(should_next_addr, io.cpu.execute_addr(11, 6), io.cpu.addr(11, 6))) + val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B))) + val tag_wdata = RegInit(0.U(tagWidth.W)) + + val data = Wire(Vec(nway, UInt(DATA_WID.W))) + val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W)))) + + val tag_compare_valid = Wire(Vec(nway, Bool())) + val cache_hit = tag_compare_valid.contains(true.B) + + val mmio_read_stall = io.cpu.tlb.uncached && !io.cpu.wen.orR + val mmio_write_stall = io.cpu.tlb.uncached && io.cpu.wen.orR && !write_fifo.io.enq.ready + val cached_stall = !io.cpu.tlb.uncached && !cache_hit + + val sel = tag_compare_valid(1) + + // * physical set * // + val pset = io.cpu.addr(11, 6) + + io.cpu.dcache_stall := Mux( + state === s_idle && !tlb_fill, + Mux(io.cpu.en, (cached_stall || mmio_read_stall || mmio_write_stall || !io.cpu.tlb.translation_ok), io.cpu.fence), + state =/= s_save, + ) + + val saved_rdata = RegInit(0.U(DATA_WID.W)) + + // forward last stored data in data bram + val last_waddr = RegNext(data_waddr) + val last_wstrb = RegInit(VecInit(Seq.fill(nway)(0.U(DATA_WID.W)))) + val last_wdata = RegNext(data_wdata) + val cache_data_forward = Wire(Vec(nway, UInt(DATA_WID.W))) + + io.cpu.rdata := Mux(state === s_save, saved_rdata, cache_data_forward(sel)) + + // bank tagv ram + for { i <- 0 until nway } { + val bank_ram = Module(new SimpleDualPortRam(nset * nbank, bankWidthBits, byteAddressable = true)) + bank_ram.io.ren := true.B + bank_ram.io.raddr := data_raddr + data(i) := bank_ram.io.rdata + + bank_ram.io.wen := data_wstrb(i).orR + bank_ram.io.waddr := data_waddr + bank_ram.io.wdata := data_wdata + bank_ram.io.wstrb := data_wstrb(i) + + val tag_ram = Module(new LUTRam(nset, tagWidth)) + tag_ram.io.raddr := tag_raddr + tag(i) := tag_ram.io.rdata + + tag_ram.io.wen := tag_wstrb(i) + tag_ram.io.waddr := victim.set + tag_ram.io.wdata := tag_wdata + + tag_compare_valid(i) := tag(i) === io.cpu.tlb.tag && valid(pset)(i) && io.cpu.tlb.translation_ok + cache_data_forward(i) := Mux( + last_waddr === io.cpu.addr(11, 2), + ((last_wstrb(i) & last_wdata) | (data(i) & (~last_wstrb(i)))), + data(i), + ) + + data_wstrb(i) := Mux( + tag_compare_valid(i) && io.cpu.en && io.cpu.wen.orR && !io.cpu.tlb.uncached && state === s_idle && !tlb_fill, + io.cpu.wen, + victim.wstrb(i), + ) + + last_wstrb(i) := Cat( + Fill(8, data_wstrb(i)(3)), + Fill(8, data_wstrb(i)(2)), + Fill(8, data_wstrb(i)(1)), + Fill(8, data_wstrb(i)(0)), + ) + } + val write_buffer_axi_busy = RegInit(false.B) + + val ar = RegInit(0.U.asTypeOf(new AR())) + val arvalid = RegInit(false.B) + io.axi.ar.bits <> ar + io.axi.ar.valid := arvalid + val rready = RegInit(false.B) + io.axi.r.ready := rready + val aw = RegInit(0.U.asTypeOf(new AW())) + val awvalid = RegInit(false.B) + io.axi.aw.bits <> aw + io.axi.aw.valid := awvalid + val w = RegInit(0.U.asTypeOf(new W())) + val wvalid = RegInit(false.B) + io.axi.w.bits <> w + io.axi.w.valid := wvalid + + io.axi.b.ready := true.B + + val current_mmio_write_saved = RegInit(false.B) + + // write buffer + when(write_buffer_axi_busy) { // To implement SC memory ordering, when store buffer busy, axi is unseable. + when(io.axi.aw.fire) { + awvalid := false.B + } + when(io.axi.w.fire) { + wvalid := false.B + w.last := false.B + } + when(io.axi.b.fire) { + write_buffer_axi_busy := false.B + } + }.elsewhen(write_fifo.io.deq.valid) { + write_fifo.io.deq.ready := write_fifo.io.deq.valid + when(write_fifo.io.deq.fire) { + aw.addr := write_fifo.io.deq.bits.addr + aw.size := Cat(0.U(1.W), write_fifo.io.deq.bits.size) + w.data := write_fifo.io.deq.bits.data + w.strb := write_fifo.io.deq.bits.strb + } + aw.len := 0.U + awvalid := true.B + w.last := true.B + wvalid := true.B + write_buffer_axi_busy := true.B + } + + switch(state) { + is(s_idle) { + when(tlb_fill) { + tlb_fill := false.B + when(!io.cpu.tlb.hit) { + state := s_save + } + }.elsewhen(io.cpu.en) { + when(!io.cpu.tlb.translation_ok) { + when(io.cpu.tlb.tlb1_ok) { + state := s_save + }.otherwise { + tlb_fill := true.B + } + }.elsewhen(io.cpu.tlb.uncached) { + when(io.cpu.wen.orR) { + when(write_fifo.io.enq.ready && !current_mmio_write_saved) { + write_fifo.io.enq.valid := true.B + write_fifo.io.enq.bits.addr := Mux( + io.cpu.rlen === 2.U, + Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)), + io.cpu.tlb.pa, + ) + write_fifo.io.enq.bits.size := io.cpu.rlen + write_fifo.io.enq.bits.strb := io.cpu.wen + write_fifo.io.enq.bits.data := io.cpu.wdata + + current_mmio_write_saved := true.B + } + when(!io.cpu.dcache_stall && !io.cpu.cpu_stall) { + current_mmio_write_saved := false.B + } + }.elsewhen(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) { + ar.addr := Mux(io.cpu.rlen === 2.U, Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)), io.cpu.tlb.pa) + ar.len := 0.U + ar.size := Cat(0.U(1.W), io.cpu.rlen) + arvalid := true.B + state := s_uncached + rready := true.B + } // when store buffer busy, read will stop at s_idle but stall pipeline. + }.otherwise { + when(!cache_hit) { + state := s_replace + axi_cnt.reset() + victim.set := pset + victim_cnt.reset() + read_ready_set := pset + read_ready_cnt := 0.U + victim.waddr := Cat(pset, 0.U(4.W)) + victim.valid := true.B + victim.writeback := dirty(pset)(lru(pset)) + }.otherwise { + when(!io.cpu.dcache_stall) { + // update lru and mark dirty + lru(pset) := ~sel + when(io.cpu.wen.orR) { + dirty(pset)(sel) := true.B + } + when(io.cpu.cpu_stall) { + saved_rdata := cache_data_forward(sel) + state := s_save + } + } + } + } + }.elsewhen(io.cpu.fence) { + when(dirty(fset).contains(true.B)) { + when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) { + state := s_writeback + axi_cnt.reset() + victim.set := fset + victim_cnt.reset() + read_ready_set := fset + read_ready_cnt := 0.U + victim.valid := true.B + } + }.otherwise { + when(valid(fset).contains(true.B)) { + valid(fset)(0) := false.B + valid(fset)(1) := false.B + } + state := s_save + } + } + } + is(s_uncached) { + when(arvalid && io.axi.ar.ready) { + arvalid := false.B + } + when(io.axi.r.valid) { + saved_rdata := io.axi.r.bits.data + state := s_save + } + } + is(s_writeback) { + when(fence.working) { + when(victim_cnt.value =/= (burstSize - 1).U) { + victim_cnt.inc() + } + read_ready_set := victim.set + read_ready_cnt := victim_cnt.value + read_buffer(read_ready_cnt) := data(dirty(fset)(1)) + when(!aw_handshake) { + aw.addr := Cat(tag(dirty(fset)(1)), fset, 0.U(6.W)) + aw.len := 15.U + aw.size := 2.U(3.W) + awvalid := true.B + w.data := data(dirty(fset)(1)) + w.strb := 15.U + w.last := false.B + wvalid := true.B + aw_handshake := true.B + } + when(io.axi.aw.fire) { + awvalid := false.B + } + when(io.axi.w.fire) { + when(w.last) { + wvalid := false.B + }.otherwise { + w.data := Mux( + ((axi_cnt.value + 1.U) === read_ready_cnt), + data(dirty(fset)(1)), + read_buffer(axi_cnt.value + 1.U), + ) + axi_cnt.inc() + when(axi_cnt.value + 1.U === (burstSize - 1).U) { + w.last := true.B + } + } + } + when(io.axi.b.valid) { + dirty(fset)(dirty(fset)(1)) := false.B + fence.working := false.B + victim.valid := false.B + state := s_idle + } + }.otherwise { + aw_handshake := false.B + fence.working := true.B + victim_cnt.inc() + } + } + is(s_replace) { + when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) { + when(victim.working) { + when(victim.writeback) { + when(victim_cnt.value =/= (burstSize - 1).U) { + victim_cnt.inc() + } + read_ready_set := victim.set + read_ready_cnt := victim_cnt.value + read_buffer(read_ready_cnt) := data(lru(pset)) + when(!aw_handshake) { + aw.addr := Cat(tag(lru(pset)), pset, 0.U(6.W)) + aw.len := 15.U + aw.size := 2.U(3.W) + awvalid := true.B + aw_handshake := true.B + w.data := data(lru(pset)) + w.strb := 15.U + w.last := false.B + wvalid := true.B + } + when(io.axi.aw.fire) { + awvalid := false.B + } + when(io.axi.w.fire) { + when(w.last) { + wvalid := false.B + }.otherwise { + w.data := Mux( + ((axi_cnt.value + 1.U) === read_ready_cnt), + data(lru(pset)), + read_buffer(axi_cnt.value + 1.U), + ) + axi_cnt.inc() + when(axi_cnt.value + 1.U === (burstSize - 1).U) { + w.last := true.B + } + } + } + when(io.axi.b.valid) { + dirty(pset)(lru(pset)) := false.B + victim.writeback := false.B + } + } + when(!ar_handshake) { + ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W)) + ar.len := 15.U + ar.size := 2.U(3.W) + arvalid := true.B + rready := true.B + ar_handshake := true.B + victim.wstrb(lru(pset)) := 15.U + tag_wstrb(lru(pset)) := true.B + tag_wdata := io.cpu.tlb.pa(31, 12) + } + when(io.axi.ar.fire) { + tag_wstrb(lru(pset)) := false.B + arvalid := false.B + } + when(io.axi.r.fire) { + when(io.axi.r.bits.last) { + rready := false.B + victim.wstrb(lru(pset)) := 0.U + }.otherwise { + victim.waddr := victim.waddr + 1.U + } + } + when( + (!victim.writeback || io.axi.b.valid) && ((ar_handshake && io.axi.r.valid && io.axi.r.bits.last) || (ar_handshake && !rready)), + ) { + victim.valid := false.B + valid(pset)(lru(pset)) := true.B + } + when(!victim.valid) { + victim.working := false.B + state := s_idle + } + }.otherwise { + ar_handshake := false.B + aw_handshake := false.B + victim.working := true.B + victim_cnt.inc() + } + } + } + is(s_save) { + when(!io.cpu.dcache_stall && !io.cpu.cpu_stall) { + state := s_idle + } + } + } + + // ===----------------------------------------------------------------=== + // statistic + // ===----------------------------------------------------------------=== + val req_cnt = RegInit(0.U(32.W)) + when(io.cpu.en) { + req_cnt := req_cnt + 1.U + } + val hit_cnt = RegInit(0.U(32.W)) + when(cache_hit) { + hit_cnt := hit_cnt + 1.U + } + if (!config.build) { + io.statistic.get.request := req_cnt + io.statistic.get.hit := hit_cnt + } +} diff --git a/chisel/playground/src/cache/ICache.scala b/chisel/playground/src/cache/ICache.scala new file mode 100644 index 0000000..1511753 --- /dev/null +++ b/chisel/playground/src/cache/ICache.scala @@ -0,0 +1,249 @@ +// * Cache 设计借鉴了nscscc2021 cqu的cdim * // +package cache + +import chisel3._ +import chisel3.util._ +import memory._ +import cpu.CacheConfig +import cpu.defines._ +import cpu.CpuConfig +import cpu.defines.Const._ + +class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module { + val nway: Int = cacheConfig.nway + val nset: Int = cacheConfig.nset + val nbank: Int = cacheConfig.nbank + val ninst: Int = cacheConfig.ninst // 取指令的数量 + val bankOffsetWidth: Int = cacheConfig.bankOffsetWidth + val bankWidth: Int = cacheConfig.bankWidth + val tagWidth: Int = cacheConfig.tagWidth + val indexWidth: Int = cacheConfig.indexWidth + val offsetWidth: Int = cacheConfig.offsetWidth + val io = IO(new Bundle { + val cpu = Flipped(new Cache_ICache()) + val axi = new ICache_AXIInterface() + val statistic = if (!config.build) Some(new ICacheStatistic()) else None + }) + require(isPow2(ninst), "ninst must be power of 2") + // * addr organization * // + // ====================================== + // | tag | index |offset| + // |31 12|11 6|5 0| + // ====================================== + // | offset | + // | bank index | bank offset | + // | 5 4 | 3 2 | + // ============================ + + val tlb_fill = RegInit(false.B) + // * fsm * // + val s_idle :: s_uncached :: s_replace :: s_save :: Nil = Enum(4) + val state = RegInit(s_idle) + + // * nway * nset * // + // * 128 bit for 4 inst * // + // ========================================================= + // | valid | tag | bank 0 | bank 1 | bank 2 | bank 3 | + // | 1 | 20 | 128 | 128 | 128 | 128 | + // ========================================================= + // | bank | + // | inst 0 | inst 1 | inst 2 | inst 3 | + // | 32 | 32 | 32 | 32 | + // ===================================== + val instperbank = bankWidth / 4 // 每个bank存储的指令数 + val valid = RegInit(VecInit(Seq.fill(nset * nbank)(VecInit(Seq.fill(instperbank)(false.B))))) + + val data = Wire(Vec(nway, Vec(instperbank, UInt(DATA_WID.W)))) + val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W)))) + + // * should choose next addr * // + val should_next_addr = (state === s_idle && !tlb_fill) || (state === s_save) + + val data_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, bankOffsetWidth) + val data_wstrb = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(instperbank)(0.U(4.W)))))) + + val tag_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, offsetWidth) + val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B))) + val tag_wdata = RegInit(0.U(tagWidth.W)) + + // * lru * // + val lru = RegInit(VecInit(Seq.fill(nset * nbank)(false.B))) + + // * itlb * // + when(tlb_fill) { tlb_fill := false.B } + io.cpu.tlb.fill := tlb_fill + io.cpu.tlb.icache_is_save := (state === s_save) + + // * fence * // + val fence_index = io.cpu.fence_addr(indexWidth + offsetWidth - 1, offsetWidth) + when(io.cpu.fence && !io.cpu.icache_stall && !io.cpu.cpu_stall) { + valid(fence_index) := VecInit(Seq.fill(instperbank)(false.B)) + } + + // * replace set * // + val rset = RegInit(0.U(6.W)) + + // * virtual set * // + val vset = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth) + + // * cache hit * // + val tag_compare_valid = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.tag && valid(vset)(i))) + val cache_hit = tag_compare_valid.contains(true.B) + val cache_hit_available = cache_hit && io.cpu.tlb.translation_ok && !io.cpu.tlb.uncached + val sel = tag_compare_valid(1) + + val bank_offset = io.cpu.addr(0)(log2Ceil(instperbank) + 1, 2) + val inst = VecInit(Seq.tabulate(instperbank)(i => Mux(i.U <= (3.U - bank_offset), data(sel)(i.U + bank_offset), 0.U))) + + val inst_valid = VecInit(Seq.tabulate(instperbank)(i => cache_hit_available && i.U <= (3.U - bank_offset))) + + val saved = RegInit(VecInit(Seq.fill(instperbank)(0.U.asTypeOf(new Bundle { + val inst = UInt(PC_WID.W) + val valid = Bool() + })))) + + val axi_cnt = Counter(cacheConfig.burstSize) + + // bank tag ram + for { i <- 0 until nway; j <- 0 until instperbank } { + val bank = Module(new SimpleDualPortRam(nset * nbank, DATA_WID, byteAddressable = true)) + bank.io.ren := true.B + bank.io.raddr := data_raddr + data(i)(j) := bank.io.rdata + + bank.io.wen := data_wstrb(i)(j).orR + bank.io.waddr := Cat(rset, axi_cnt.value(log2Ceil(cacheConfig.burstSize) - 1, log2Ceil(instperbank))) + bank.io.wdata := Mux(j.U === axi_cnt.value(log2Ceil(instperbank) - 1, 0), io.axi.r.bits.data, 0.U) + bank.io.wstrb := data_wstrb(i)(j) + } + + for { i <- 0 until ninst } { + io.cpu.inst_valid(i) := Mux(state === s_idle && !tlb_fill, inst_valid(i), saved(i).valid) && io.cpu.req + io.cpu.inst(i) := Mux(state === s_idle && !tlb_fill, inst(i), saved(i).inst) + } + + for { i <- 0 until nway } { + val tag_bram = Module(new LUTRam(nset, tagWidth)) + tag_bram.io.raddr := tag_raddr + tag(i) := tag_bram.io.rdata + + tag_bram.io.wen := tag_wstrb(i) + tag_bram.io.waddr := rset + tag_bram.io.wdata := tag_wdata + } + + io.cpu.icache_stall := Mux(state === s_idle && !tlb_fill, (!cache_hit_available && io.cpu.req), state =/= s_save) + + val ar = RegInit(0.U.asTypeOf(new AR())) + val arvalid = RegInit(false.B) + ar <> io.axi.ar.bits + arvalid <> io.axi.ar.valid + + val r = RegInit(0.U.asTypeOf(new R())) + val rready = RegInit(false.B) + r <> io.axi.r.bits + rready <> io.axi.r.ready + + when(tlb_fill === true.B) { + tlb_fill := false.B + } + + switch(state) { + is(s_idle) { + when(tlb_fill) { + when(!io.cpu.tlb.hit) { + state := s_save + saved(0).inst := 0.U + saved(0).valid := true.B + } + }.elsewhen(io.cpu.req) { + when(!io.cpu.tlb.translation_ok) { + tlb_fill := true.B + }.elsewhen(io.cpu.tlb.uncached) { + state := s_uncached + ar.addr := io.cpu.tlb.pa + ar.len := 0.U(log2Ceil((nbank * bankWidth) / 4).W) + ar.size := 2.U(bankOffsetWidth.W) + arvalid := true.B + }.elsewhen(!cache_hit) { + state := s_replace + ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W)) + ar.len := 15.U(log2Ceil((nbank * bankWidth) / 4).W) + ar.size := 2.U(bankOffsetWidth.W) + arvalid := true.B + + rset := vset + (0 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := Mux(i.U === 0.U, 0xf.U, 0x0.U)) + tag_wstrb(lru(vset)) := true.B + tag_wdata := io.cpu.tlb.tag + valid(vset)(lru(vset)) := true.B + axi_cnt.reset() + }.elsewhen(!io.cpu.icache_stall) { + lru(vset) := ~sel + when(io.cpu.cpu_stall) { + state := s_save + (1 until instperbank).foreach(i => saved(i).inst := data(sel)(i)) + (0 until instperbank).foreach(i => saved(i).valid := inst_valid(i)) + } + } + } + } + is(s_uncached) { + when(io.axi.ar.valid) { + when(io.axi.ar.ready) { + arvalid := false.B + rready := true.B + } + }.elsewhen(io.axi.r.fire) { + // * uncached not support burst transport * // + state := s_save + saved(0).inst := io.axi.r.bits.data + saved(0).valid := true.B + rready := false.B + } + } + is(s_replace) { + when(io.axi.ar.valid) { + when(io.axi.ar.ready) { + arvalid := false.B + rready := true.B + } + }.elsewhen(io.axi.r.fire) { + // * burst transport * // + when(!io.axi.r.bits.last) { + axi_cnt.inc() + data_wstrb(lru(vset))(0) := data_wstrb(lru(vset))(instperbank - 1) + (1 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := data_wstrb(lru(vset))(i - 1)) + }.otherwise { + rready := false.B + data_wstrb(lru(vset)) := 0.U.asTypeOf(Vec(instperbank, UInt(4.W))) + tag_wstrb(lru(vset)) := false.B + } + }.elsewhen(!io.axi.r.ready) { + state := s_idle + } + } + is(s_save) { + when(!io.cpu.cpu_stall && !io.cpu.icache_stall) { + state := s_idle + (0 until instperbank).foreach(i => saved(i).valid := false.B) + } + } + } + + // ===----------------------------------------------------------------=== + // statistic + // ===----------------------------------------------------------------=== + val req_cnt = RegInit(0.U(32.W)) + when(io.cpu.req) { + req_cnt := req_cnt + 1.U + } + val hit_cnt = RegInit(0.U(32.W)) + when(io.cpu.req && cache_hit) { + hit_cnt := hit_cnt + 1.U + } + if (!config.build) { + io.statistic.get.request := req_cnt + io.statistic.get.hit := hit_cnt + } +} diff --git a/chisel/playground/src/cache/memory/LUTRam.scala b/chisel/playground/src/cache/memory/LUTRam.scala new file mode 100644 index 0000000..b6b82f7 --- /dev/null +++ b/chisel/playground/src/cache/memory/LUTRam.scala @@ -0,0 +1,65 @@ +package cache.memory + +import chisel3._ +import chisel3.util._ +import cpu.CacheConfig +import cpu.CpuConfig + +/** LUT ram for XPM, one port for read/write, one port for read + * @param depth + * how many lines there are in the bank + * @param width + * how wide in bits each line is + * @param config + * implicit configuration to control generate ram for simulation or elaboration + */ +class LUTRam(depth: Int, width: Int)(implicit val config: CpuConfig) extends Module { + require(isPow2(depth)) + val waddridth = log2Ceil(depth) + val io = IO(new Bundle { + val raddr = Input(UInt(waddridth.W)) + val rdata = Output(UInt(width.W)) + + val waddr = Input(UInt(waddridth.W)) + val wdata = Input(UInt(width.W)) + val wen = Input(Bool()) + val writeOutput = Output(UInt(width.W)) + }) + + if (config.build) { + val bank = Module( + new LUTRamIP( + wdataidth = width, + waddridth = waddridth, + byteWriteWidth = width, + numberOfLines = depth, + ), + ) + bank.io.clka := clock + bank.io.clkb := clock + bank.io.rsta := reset + bank.io.rstb := reset + + bank.io.regcea := false.B + bank.io.regceb := false.B + bank.io.ena := true.B + bank.io.enb := true.B + + bank.io.addra := io.waddr + bank.io.wea := io.wen + bank.io.dina := io.wdata + io.writeOutput := DontCare + + bank.io.addrb := io.raddr + io.rdata := bank.io.doutb + } else { + val bank = RegInit(VecInit(Seq.fill(depth)(0.U(width.W)))) + io.rdata := bank(io.raddr) + io.writeOutput := DontCare + when(io.wen) { + bank(io.waddr) := io.wdata + }.otherwise { + io.writeOutput := bank(io.waddr) + } + } +} diff --git a/chisel/playground/src/cache/memory/LUTRamIP.scala b/chisel/playground/src/cache/memory/LUTRamIP.scala new file mode 100644 index 0000000..91aac49 --- /dev/null +++ b/chisel/playground/src/cache/memory/LUTRamIP.scala @@ -0,0 +1,65 @@ +package cache.memory + +import chisel3._ +import chisel3.util.log2Ceil + +/** XPM 2019.2 XPM_MEMORY_DPDISTRAM, at page 124 of UG953(2019.2) by default, this is initialized to + * all 0 + * + * @param wdataidth + * : the size of the data to store in each line, in bits + * @param waddridth + * : the width of request + * @param byteWriteWidth + * : addressable size of write + * @param numberOfLines + * : how many **bits** there are in the memory + */ +class LUTRamIP(wdataidth: Int, waddridth: Int, byteWriteWidth: Int, numberOfLines: Int) + extends BlackBox( + Map( + "ADDR_WIDTH_A" -> waddridth, + "ADDR_WIDTH_B" -> waddridth, + "MEMORY_SIZE" -> numberOfLines * wdataidth, + "WRITE_DATA_WIDTH_A" -> wdataidth, + "READ_DATA_WIDTH_A" -> wdataidth, + "READ_DATA_WIDTH_B" -> wdataidth, + "BYTE_WRITE_WIDTH_A" -> byteWriteWidth, + "READ_LATENCY_A" -> 0, + "READ_LATENCY_B" -> 0, + "READ_RESET_VALUE_A" -> 0, + "READ_RESET_VALUE_B" -> 0, + "CLOCKING_MODE" -> "common_clock", + ), + ) { + override def desiredName: String = "xpm_memory_dpdistram" + require( + waddridth == log2Ceil(numberOfLines), + "request width should be log 2 of number of lines to request all", + ) + require( + wdataidth - (wdataidth / byteWriteWidth) * byteWriteWidth == 0, + "data width should be a multiple of byte write width", + ) + require(waddridth <= 20, "request width should be 1 to 20") + val io = IO(new Bundle { + val clka = Input(Clock()) + val clkb = Input(Clock()) + val rsta = Input(Reset()) + val rstb = Input(Reset()) + + val ena = Input(Bool()) + val enb = Input(Bool()) + val regcea = Input(Bool()) + val regceb = Input(Bool()) + + val dina = Input(UInt(wdataidth.W)) + val addra = Input(UInt(waddridth.W)) + val addrb = Input(UInt(waddridth.W)) + + val wea = Input(UInt((wdataidth / byteWriteWidth).W)) + + val douta = Output(UInt(wdataidth.W)) + val doutb = Output(UInt(wdataidth.W)) + }) +} diff --git a/chisel/playground/src/cache/memory/PortDefinitions.scala b/chisel/playground/src/cache/memory/PortDefinitions.scala new file mode 100644 index 0000000..f7ed75e --- /dev/null +++ b/chisel/playground/src/cache/memory/PortDefinitions.scala @@ -0,0 +1,37 @@ +package cache.memory + +import chisel3._ +import chisel3.util._ +import cpu.CacheConfig + +class ReadOnlyPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { + val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) + val data = Output(gen) +} + +class WriteOnlyPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { + val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) + val en = Input(Bool()) + val data = Input(gen) +} + +class WriteOnlyMaskPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { + val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) + val en = Input(UInt(cacheConfig.bankWidth.W)) + val data = Input(gen) +} + + +class ReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { + val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) + val en = Input(Bool()) + val wdata = Input(gen) + val rdata = Output(gen) +} + +class MaskedReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { + val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) + val writeMask = Input(UInt(cacheConfig.bankWidth.W)) + val wdata = Input(gen) + val rdata = Output(gen) +} diff --git a/chisel/playground/src/cache/memory/SimpleDualPortRam.scala b/chisel/playground/src/cache/memory/SimpleDualPortRam.scala new file mode 100644 index 0000000..fad24fd --- /dev/null +++ b/chisel/playground/src/cache/memory/SimpleDualPortRam.scala @@ -0,0 +1,90 @@ +package cache.memory + +import chisel3._ +import chisel3.util._ +import cpu.CpuConfig + +/** simple dual port ram, with a port for reading and a port for writing + * + * @param depth + * how many lines there are in the ram + * @param width + * how wide in bits each line is + * @param byteAddressable + * is it byte addressable? + * @param cpuCfg + * the implicit configuration for simulation and elaboration + */ +class SimpleDualPortRam(depth: Int, width: Int, byteAddressable: Boolean)(implicit + val config: CpuConfig, +) extends Module { + require(isPow2(depth)) + require( + width % 8 == 0 || !byteAddressable, + "if memory is byte addressable, then the adderss width must be a multiple of 8", + ) + val waddridth = log2Ceil(depth) + + val io = IO(new Bundle { + val raddr = Input(UInt(waddridth.W)) + val ren = Input(Bool()) + val rdata = Output(UInt(width.W)) + + val waddr = Input(UInt(waddridth.W)) + val wen = Input(Bool()) + val wstrb = Input(UInt((if (byteAddressable) width / 8 else 1).W)) + val wdata = Input(UInt(width.W)) + }) + + if (config.build) { + val memory = Module( + new SimpleDualPortRamIP( + wdataidth = width, + byteWriteWidth = if (byteAddressable) 8 else width, + numberOfLines = depth, + waddridth = waddridth, + ), + ) + memory.io.clka := clock + memory.io.clkb := clock + memory.io.rstb := reset + + memory.io.addra := io.waddr + memory.io.ena := io.wen + memory.io.dina := io.wdata + memory.io.wea := io.wstrb + + memory.io.addrb := io.raddr + memory.io.enb := io.ren + memory.io.regceb := false.B + io.rdata := memory.io.doutb + } else { + assert( + io.wstrb.orR || !io.wen, + "when write port enable is high, write vector cannot be all 0", + ) + if (byteAddressable) { + val bank = SyncReadMem(depth, Vec(width / 8, UInt(8.W))) + when(io.ren) { + io.rdata := bank(io.raddr).asTypeOf(io.rdata) + }.otherwise { + io.rdata := DontCare + } + when(io.wen) { + bank.write(io.waddr, io.wdata.asTypeOf(Vec(width / 8, UInt(8.W))), io.wstrb.asBools) + } + } else { + val bank = SyncReadMem(depth, UInt(width.W)) + + when(io.ren) { + io.rdata := bank.read(io.raddr) + }.otherwise { + io.rdata := 0.U(32.W) + } + + when(io.wen) { + bank.write(io.waddr, io.wdata) + } + } + } +} diff --git a/chisel/playground/src/cache/memory/SimpleDualPortRamIP.scala b/chisel/playground/src/cache/memory/SimpleDualPortRamIP.scala new file mode 100644 index 0000000..b697a00 --- /dev/null +++ b/chisel/playground/src/cache/memory/SimpleDualPortRamIP.scala @@ -0,0 +1,68 @@ +package cache.memory + +import chisel3._ +import chisel3.util.log2Ceil + +/** simple dual port ram + * + * @param wdataidth + * : width of every data line + * @param byteWriteWidth + * : how many bits to write per mask + * @param numberOfLines + * : how many lines of data are in the ram + * @param waddridth + * : how wide is the request (to cover all lines) + * @param memoryPrimitive + * : should I use auto, block ram or distributed ram + */ +class SimpleDualPortRamIP( + wdataidth: Int = 32, + byteWriteWidth: Int = 8, + numberOfLines: Int, + waddridth: Int, + memoryPrimitive: String = "block", +) extends BlackBox( + Map( + "ADDR_WIDTH_A" -> waddridth, + "ADDR_WIDTH_B" -> waddridth, + "WRITE_DATA_WIDTH_A" -> wdataidth, + "READ_DATA_WIDTH_B" -> wdataidth, + "BYTE_WRITE_WIDTH_A" -> byteWriteWidth, + "CLOCKING_MODE" -> "common_clock", + "READ_LATENCY_B" -> 1, + "MEMORY_SIZE" -> numberOfLines * wdataidth, + "MEMORY_PRIMITIVE" -> memoryPrimitive, + ), + ) { + override def desiredName: String = "xpm_memory_sdpram" + require(waddridth <= 20, "request width should be 1 to 20") + require( + wdataidth - (wdataidth / byteWriteWidth) * byteWriteWidth == 0, + "data width should be a multiple of byte write width", + ) + require( + List("auto", "block", "distributed", "ultra").contains(memoryPrimitive), + "memory primitive should be auto, block ram, dist ram or ultra ram", + ) + require( + waddridth == log2Ceil(numberOfLines), + "request width should be log 2 of number of lines to request all", + ) + val io = IO(new Bundle { + // clock and reset + val clka = Input(Clock()) + val clkb = Input(Clock()) + val rstb = Input(Reset()) + + val addra = Input(UInt(waddridth.W)) + val dina = Input(UInt(wdataidth.W)) + val ena = Input(Bool()) + val wea = Input(UInt((wdataidth / byteWriteWidth).W)) + + val addrb = Input(UInt(waddridth.W)) + val enb = Input(Bool()) + val regceb = Input(Bool()) + val doutb = Output(UInt(wdataidth.W)) + }) +} diff --git a/chisel/playground/src/ctrl/Ctrl.scala b/chisel/playground/src/ctrl/Ctrl.scala new file mode 100644 index 0000000..0a7f052 --- /dev/null +++ b/chisel/playground/src/ctrl/Ctrl.scala @@ -0,0 +1,48 @@ +package cpu.ctrl + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class Ctrl(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val cacheCtrl = Flipped(new CacheCtrl()) + val fetchUnit = Flipped(new FetchUnitCtrl()) + val instFifo = Flipped(new InstFifoCtrl()) + val decoderUnit = Flipped(new DecoderUnitCtrl()) + val executeUnit = Flipped(new ExecuteCtrl()) + val memoryUnit = Flipped(new MemoryCtrl()) + val writeBackUnit = Flipped(new WriteBackCtrl()) + }) + + val inst0_lw_stall = (io.executeUnit.inst(0).mem_wreg) && + (io.decoderUnit.inst0.src1.ren && io.decoderUnit.inst0.src1.raddr === io.executeUnit.inst(0).reg_waddr || + io.decoderUnit.inst0.src2.ren && io.decoderUnit.inst0.src2.raddr === io.executeUnit.inst(0).reg_waddr) + val inst1_lw_stall = (io.executeUnit.inst(1).mem_wreg) && + (io.decoderUnit.inst0.src1.ren && io.decoderUnit.inst0.src1.raddr === io.executeUnit.inst(1).reg_waddr || + io.decoderUnit.inst0.src2.ren && io.decoderUnit.inst0.src2.raddr === io.executeUnit.inst(1).reg_waddr) + val lw_stall = inst0_lw_stall || inst1_lw_stall + // TODO: 这里的stall信号可能不对 + val longest_stall = io.executeUnit.fu_stall || io.cacheCtrl.iCache_stall || io.cacheCtrl.dCache_stall + + io.fetchUnit.allow_to_go := !io.cacheCtrl.iCache_stall + io.decoderUnit.allow_to_go := !(lw_stall || longest_stall) + io.executeUnit.allow_to_go := !longest_stall + io.memoryUnit.allow_to_go := !longest_stall + io.writeBackUnit.allow_to_go := !longest_stall || io.memoryUnit.flush_req + + io.fetchUnit.do_flush := false.B + io.decoderUnit.do_flush := io.memoryUnit.flush_req || io.executeUnit.branch || io.decoderUnit.branch + io.executeUnit.do_flush := io.memoryUnit.flush_req || io.executeUnit.branch + io.memoryUnit.do_flush := io.memoryUnit.flush_req + io.writeBackUnit.do_flush := false.B + + io.instFifo.delay_slot_do_flush := io.memoryUnit.flush_req + + io.executeUnit.fu.do_flush := io.memoryUnit.do_flush + io.executeUnit.fu.eret := io.memoryUnit.eret + io.executeUnit.fu.allow_to_go := io.memoryUnit.allow_to_go + +} diff --git a/chisel/playground/src/defines/Bundles.scala b/chisel/playground/src/defines/Bundles.scala new file mode 100644 index 0000000..2d4bafb --- /dev/null +++ b/chisel/playground/src/defines/Bundles.scala @@ -0,0 +1,333 @@ +package cpu.defines + +import chisel3._ +import chisel3.util._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class TlbEntry extends Bundle { + val vpn2 = UInt(VPN2_WID.W) + val asid = UInt(ASID_WID.W) + val g = Bool() + val pfn = Vec(2, UInt(PFN_WID.W)) + val c = Vec(2, Bool()) + val d = Vec(2, Bool()) + val v = Vec(2, Bool()) +} + +class ExceptionInfo extends Bundle { + val flush_req = Bool() + val tlb_refill = Bool() + val eret = Bool() + val badvaddr = UInt(PC_WID.W) + val bd = Bool() + val excode = UInt(EXCODE_WID.W) +} + +class SrcInfo extends Bundle { + val src1_data = UInt(DATA_WID.W) + val src2_data = UInt(DATA_WID.W) +} + +class RdInfo extends Bundle { + val wdata = UInt(DATA_WID.W) +} + +class InstInfo extends Bundle { + val inst_valid = Bool() + val reg1_ren = Bool() + val reg1_raddr = UInt(REG_ADDR_WID.W) + val reg2_ren = Bool() + val reg2_raddr = UInt(REG_ADDR_WID.W) + val fusel = UInt(FU_SEL_WID.W) + val op = UInt(OP_WID.W) + val reg_wen = Bool() + val reg_waddr = UInt(REG_ADDR_WID.W) + val imm32 = UInt(DATA_WID.W) + val cp0_addr = UInt(CP0_ADDR_WID.W) + val dual_issue = Bool() + val whilo = Bool() + val rmem = Bool() + val wmem = Bool() + val mul = Bool() + val div = Bool() + val branch_link = Bool() + val ifence = Bool() + val dfence = Bool() + val tlbfence = Bool() + val mem_addr = UInt(DATA_ADDR_WID.W) + val mem_wreg = Bool() + val inst = UInt(INST_WID.W) +} + +class MemRead extends Bundle { + val mem_wreg = Bool() + val reg_waddr = UInt(REG_ADDR_WID.W) +} + +class SrcReadSignal extends Bundle { + val ren = Bool() + val raddr = UInt(REG_ADDR_WID.W) +} + +class CacheCtrl extends Bundle { + val iCache_stall = Output(Bool()) + val dCache_stall = Output(Bool()) +} + +class FetchUnitCtrl extends Bundle { + val allow_to_go = Input(Bool()) + val do_flush = Input(Bool()) +} + +class InstFifoCtrl extends Bundle { + val delay_slot_do_flush = Input(Bool()) + + val has2insts = Output(Bool()) +} + +class DecoderUnitCtrl extends Bundle { + val inst0 = Output(new Bundle { + val src1 = new SrcReadSignal() + val src2 = new SrcReadSignal() + }) + val branch = Output(Bool()) + + val allow_to_go = Input(Bool()) + val do_flush = Input(Bool()) +} + +class ExecuteFuCtrl extends Bundle { + val allow_to_go = Input(Bool()) + val do_flush = Input(Bool()) + val eret = Input(Bool()) +} + +class ExecuteCtrl(implicit val config: CpuConfig) extends Bundle { + val inst = Output(Vec(config.fuNum, new MemRead())) + val fu_stall = Output(Bool()) + val branch = Output(Bool()) + + val allow_to_go = Input(Bool()) + val do_flush = Input(Bool()) + + val fu = new ExecuteFuCtrl() +} + +class MemoryCtrl extends Bundle { + val flush_req = Output(Bool()) + val eret = Output(Bool()) + + val allow_to_go = Input(Bool()) + val do_flush = Input(Bool()) +} + +class WriteBackCtrl extends Bundle { + val allow_to_go = Input(Bool()) + val do_flush = Input(Bool()) +} + +class Tlb1InfoI extends Bundle { + val invalid = Bool() + val refill = Bool() +} + +class Tlb1InfoD extends Tlb1InfoI { + val modify = Bool() +} + +class Tlb2Info extends Bundle { + val vpn2 = Input(UInt(19.W)) + val found = Output(Bool()) + val entry = Output(new TlbEntry()) +} + +class Tlb_ICache extends Bundle { + val fill = Input(Bool()) + val icache_is_save = Input(Bool()) + val uncached = Output(Bool()) + + val translation_ok = Output(Bool()) + val hit = Output(Bool()) + val tag = Output(UInt(20.W)) + val pa = Output(UInt(32.W)) +} + +class Tlb_DCache extends Bundle { + val fill = Input(Bool()) + val dcache_is_idle = Input(Bool()) + val dcache_is_save = Input(Bool()) + val uncached = Output(Bool()) + val tlb1_ok = Output(Bool()) + + val translation_ok = Output(Bool()) + val hit = Output(Bool()) + val tag = Output(UInt(20.W)) + val pa = Output(UInt(32.W)) +} + +// cpu to icache +class Cache_ICache(implicit + val config: CpuConfig, +) extends Bundle { + // read inst request from cpu + val req = Output(Bool()) + val addr = Output(Vec(config.instFetchNum, UInt(32.W))) // virtual address and next virtual address + + // read inst result + val inst = Input(Vec(config.instFetchNum, UInt(32.W))) + val inst_valid = Input(Vec(config.instFetchNum, Bool())) + + // control + val cpu_stall = Output(Bool()) + val icache_stall = Input(Bool()) + + val tlb = new Tlb_ICache() + + val fence = Output(Bool()) + val fence_addr = Output(UInt(32.W)) +} + +// cpu to dcache +class Cache_DCache extends Bundle { + val cpu_stall = Output(Bool()) + val dcache_stall = Input(Bool()) + + val execute_addr = Output(UInt(32.W)) + // 连接 mem unit + val rdata = Input(UInt(32.W)) + val en = Output(Bool()) + val wen = Output(UInt(4.W)) + val rlen = Output(UInt(2.W)) + val wdata = Output(UInt(32.W)) + val addr = Output(UInt(32.W)) + + val tlb = new Tlb_DCache() + + val fence = Output(Bool()) + val fence_addr = Output(UInt(32.W)) +} + +// axi +// master + +class AR extends Bundle { + val addr = UInt(32.W) + val len = UInt(8.W) + val size = UInt(3.W) +} + +class R extends Bundle { + val data = UInt(32.W) + val last = Bool() +} + +class AW extends Bundle { + val addr = UInt(32.W) + val len = UInt(8.W) + val size = UInt(3.W) +} + +class W extends Bundle { + val data = UInt(32.W) + val strb = UInt(4.W) + val last = Bool() +} + +class ICache_AXIInterface extends Bundle { + val ar = Decoupled(new AR()) + val r = Flipped(Decoupled(new R())) +} + +class DCache_AXIInterface extends ICache_AXIInterface { + val aw = Decoupled(new AW()) + + val w = Decoupled(new W()) + + val b = Flipped(Decoupled()) +} + +class Cache_AXIInterface extends Bundle { + // axi read channel + val icache = new ICache_AXIInterface() + val dcache = new DCache_AXIInterface() +} + +// AXI read address channel +class AXI_AR extends Bundle { + val id = UInt(4.W) // transaction ID + val addr = UInt(32.W) // address + val len = UInt(8.W) // burst length + val size = UInt(3.W) // transfer size + val burst = UInt(2.W) // burst type + val lock = UInt(2.W) // lock type + val cache = UInt(4.W) // cache type + val prot = UInt(3.W) // protection type +} + +// AXI read data channel +class AXI_R extends Bundle { + val id = UInt(4.W) // transaction ID + val data = UInt(32.W) // read data + val resp = UInt(2.W) // response type + val last = Bool() // last beat of burst +} + +// AXI write address channel +class AXI_AW extends Bundle { + val id = UInt(4.W) // transaction ID + val addr = UInt(32.W) // address + val len = UInt(8.W) // burst length + val size = UInt(3.W) // transfer size + val burst = UInt(2.W) // burst type + val lock = UInt(2.W) // lock type + val cache = UInt(4.W) // cache type + val prot = UInt(3.W) // protection type +} + +// AXI write data channel +class AXI_W extends Bundle { + val id = UInt(4.W) // transaction ID + val data = UInt(32.W) // write data + val strb = UInt(4.W) // byte enable + val last = Bool() // last beat of burst +} + +// AXI write response channel +class AXI_B extends Bundle { + val id = UInt(4.W) // transaction ID + val resp = UInt(2.W) // response type +} + +// AXI interface +class AXI extends Bundle { + val ar = Decoupled(new AXI_AR()) // read address channel + val r = Flipped(Decoupled(new AXI_R())) // read data channel + val aw = Decoupled(new AXI_AW()) // write address channel + val w = Decoupled(new AXI_W()) // write data channel + val b = Flipped(Decoupled(new AXI_B())) // write response channel +} + +class DEBUG(implicit config: CpuConfig) extends Bundle { + val wb_pc = Output(UInt(32.W)) + val wb_rf_wen = Output(UInt(4.W)) + val wb_rf_wnum = Output(UInt(5.W)) + val wb_rf_wdata = Output(UInt(32.W)) +} + +class Ctrl_Sram extends Bundle { + val do_flush = Output(Bool()) +} + +class Ctrl_Stage extends Bundle { + val do_flush = Output(Bool()) + val after_ex = Output(Bool()) +} + +class Sram_Ctrl extends Bundle { + val sram_discard = Output(UInt(2.W)) +} + +class Pipeline_Ctrl extends Bundle { + val ex = Output(Bool()) +} diff --git a/chisel/playground/src/defines/Const.scala b/chisel/playground/src/defines/Const.scala new file mode 100644 index 0000000..3430d66 --- /dev/null +++ b/chisel/playground/src/defines/Const.scala @@ -0,0 +1,254 @@ +package cpu.defines + +import chisel3._ +import chisel3.util._ +import cpu.defines.Instructions +import cpu.CpuConfig + +trait Constants { + val config = new CpuConfig + // 全局 + val PC_WID = 32 + val PC_INIT = "hbfc00000".U(PC_WID.W) + + val EXT_INT_WID = 6 + + val WRITE_ENABLE = true.B + val WRITE_DISABLE = false.B + val READ_ENABLE = true.B + val READ_DISABLE = false.B + val INST_VALID = false.B + val INST_INVALID = true.B + val SINGLE_ISSUE = false.B + val DUAL_ISSUE = true.B + + // AluOp + private val OP_NUM = 77 + val OP_WID = log2Ceil(OP_NUM) + // NOP + val EXE_NOP = 0.U(OP_WID.W) + // 位操作 + val EXE_AND = 1.U(OP_WID.W) + val EXE_OR = 2.U(OP_WID.W) + val EXE_XOR = 3.U(OP_WID.W) + val EXE_NOR = 4.U(OP_WID.W) + // 移位 + val EXE_SLL = 5.U(OP_WID.W) + val EXE_SLLV = 6.U(OP_WID.W) + val EXE_SRL = 7.U(OP_WID.W) + val EXE_SRLV = 8.U(OP_WID.W) + val EXE_SRA = 9.U(OP_WID.W) + val EXE_SRAV = 10.U(OP_WID.W) + // Move + val EXE_MOVZ = 11.U(OP_WID.W) + val EXE_MOVN = 12.U(OP_WID.W) + // HILO + val EXE_MFHI = 13.U(OP_WID.W) + val EXE_MTHI = 14.U(OP_WID.W) + val EXE_MFLO = 15.U(OP_WID.W) + val EXE_MTLO = 16.U(OP_WID.W) + // CP0 Move + val EXE_MFC0 = 17.U(OP_WID.W) + val EXE_MTC0 = 18.U(OP_WID.W) + // 比较 + val EXE_SLT = 19.U(OP_WID.W) + val EXE_SLTU = 20.U(OP_WID.W) + // 算数 + val EXE_ADD = 21.U(OP_WID.W) + val EXE_ADDU = 22.U(OP_WID.W) + val EXE_SUB = 23.U(OP_WID.W) + val EXE_SUBU = 24.U(OP_WID.W) + val EXE_CLZ = 25.U(OP_WID.W) + val EXE_CLO = 26.U(OP_WID.W) + val EXE_MULT = 27.U(OP_WID.W) + val EXE_MULTU = 28.U(OP_WID.W) + val EXE_MUL = 29.U(OP_WID.W) + val EXE_MADD = 30.U(OP_WID.W) + val EXE_MADDU = 31.U(OP_WID.W) + val EXE_MSUB = 32.U(OP_WID.W) + val EXE_MSUBU = 33.U(OP_WID.W) + val EXE_DIV = 34.U(OP_WID.W) + val EXE_DIVU = 35.U(OP_WID.W) + // 跳转 + val EXE_J = 36.U(OP_WID.W) + val EXE_JAL = 37.U(OP_WID.W) + val EXE_JALR = 38.U(OP_WID.W) + val EXE_JR = 39.U(OP_WID.W) + val EXE_BEQ = 40.U(OP_WID.W) + val EXE_BGEZ = 41.U(OP_WID.W) + val EXE_BGEZAL = 42.U(OP_WID.W) + val EXE_BGTZ = 43.U(OP_WID.W) + val EXE_BLEZ = 44.U(OP_WID.W) + val EXE_BLTZ = 45.U(OP_WID.W) + val EXE_BLTZAL = 46.U(OP_WID.W) + val EXE_BNE = 47.U(OP_WID.W) + // 访存 + val EXE_LB = 48.U(OP_WID.W) + val EXE_LBU = 49.U(OP_WID.W) + val EXE_LH = 50.U(OP_WID.W) + val EXE_LHU = 51.U(OP_WID.W) + val EXE_LL = 52.U(OP_WID.W) + val EXE_LW = 53.U(OP_WID.W) + val EXE_LWL = 54.U(OP_WID.W) + val EXE_LWR = 55.U(OP_WID.W) + val EXE_SB = 56.U(OP_WID.W) + val EXE_SC = 57.U(OP_WID.W) + val EXE_SH = 58.U(OP_WID.W) + val EXE_SW = 59.U(OP_WID.W) + val EXE_SWL = 60.U(OP_WID.W) + val EXE_SWR = 61.U(OP_WID.W) + // Trap + val EXE_TEQ = 62.U(OP_WID.W) + val EXE_TGE = 63.U(OP_WID.W) + val EXE_TGEU = 64.U(OP_WID.W) + val EXE_TLT = 65.U(OP_WID.W) + val EXE_TLTU = 66.U(OP_WID.W) + val EXE_TNE = 67.U(OP_WID.W) + // 例外 + val EXE_SYSCALL = 68.U(OP_WID.W) + val EXE_BREAK = 69.U(OP_WID.W) + val EXE_ERET = 70.U(OP_WID.W) + val EXE_WAIT = 71.U(OP_WID.W) + // tlb + val EXE_TLBP = 72.U(OP_WID.W) + val EXE_TLBR = 73.U(OP_WID.W) + val EXE_TLBWI = 74.U(OP_WID.W) + val EXE_TLBWR = 75.U(OP_WID.W) + // cache + val EXE_CACHE = 76.U(OP_WID.W) + + // FUSel + val FU_SEL_NUM = 8 + val FU_SEL_WID = log2Ceil(FU_SEL_NUM) + + val FU_ALU = 0.U(FU_SEL_WID.W) + val FU_MEM = 1.U(FU_SEL_WID.W) + val FU_BR = 2.U(FU_SEL_WID.W) + val FU_EX = 3.U(FU_SEL_WID.W) + val FU_MTHILO = 4.U(FU_SEL_WID.W) + val FU_MFHILO = 5.U(FU_SEL_WID.W) + val FU_MUL = 6.U(FU_SEL_WID.W) + val FU_DIV = 7.U(FU_SEL_WID.W) + + // div + val DIV_CTRL_WID = 2 + val DIV_FREE = 0.U(DIV_CTRL_WID.W) + val DIV_BY_ZERO = 1.U(DIV_CTRL_WID.W) + val DIV_ON = 2.U(DIV_CTRL_WID.W) + val DIV_END = 3.U(DIV_CTRL_WID.W) + val DIV_RESULT_READY = true.B + val DIV_RESULT_NOT_READY = false.B + val DIV_START = true.B + val DIV_STOP = false.B + + // inst rom + val INST_WID = 32 + + // data ram + val DATA_ADDR_WID = 32 + + // GPR RegFile + val AREG_NUM = 32 + val REG_ADDR_WID = 5 + val DATA_WID = 32 + val HILO_WID = 64 + + // CP0寄存器 + // CP0 Register (5.w), Select (3.w) + val CP0_INDEX_ADDR = "b00000_000".U(8.W) // 0,0 + val CP0_RANDOM_ADDR = "b00001_000".U(8.W) // 1,0 + val CP0_ENTRYLO0_ADDR = "b00010_000".U(8.W) // 2,0 + val CP0_ENTRYLO1_ADDR = "b00011_000".U(8.W) // 3,0 + val CP0_CONTEXT_ADDR = "b00100_000".U(8.W) // 4,0 + // val CP0_CONTEXT_CONFIG_ADDR = "b00100_001".U(8.W) // 4,1 + // val CP0_USER_LOCAL_ADDR = "b00100_010".U(8.W) // 4,2 + val CP0_PAGE_MASK_ADDR = "b00101_000".U(8.W) // 5,0 + // val CP0_PAGE_GRAIN_ADDR = "b00101_001".U(8.W) // 5,1 + val CP0_WIRED_ADDR = "b00110_000".U(8.W) // 6,0 + // val CP0_HWRENA_ADDR = "b00111_000".U(8.W) // 7,0 + val CP0_BADV_ADDR = "b01000_000".U(8.W) // 8,0 + val CP0_COUNT_ADDR = "b01001_000".U(8.W) // 9,0 (sel保留 6or7) + val CP0_ENTRYHI_ADDR = "b01010_000".U(8.W) // 10,0 + val CP0_COMPARE_ADDR = "b01011_000".U(8.W) // 11,0 (sel保留 6or7) + val CP0_STATUS_ADDR = "b01100_000".U(8.W) // 12,0 + // val CP0_INTCTL_ADDR = "b01100_001".U(8.W) // 12,1 + // val CP0_SRSCTL_ADDR = "b01100_010".U(8.W) // 12,2 + // val CP0_SRSMAP_ADDR = "b01100_011".U(8.W) // 12,3 + val CP0_CAUSE_ADDR = "b01101_000".U(8.W) // 13,0 + val CP0_EPC_ADDR = "b01110_000".U(8.W) // 14,0 + val CP0_PRID_ADDR = "b01111_000".U(8.W) // 15,0 + val CP0_EBASE_ADDR = "b01111_001".U(8.W) // 15,1 + // val CP0_CDMMBASE_ADDR = "b01111_010".U(8.W) // 15,2 + // val CP0_CMGCRBASE_ADDR = "b01111_011".U(8.W) // 15,3 + val CP0_CONFIG_ADDR = "b10000_000".U(8.W) // 16,0 + val CP0_CONFIG1_ADDR = "b10000_001".U(8.W) // 16,1 + // val CP0_CONFIG2_ADDR = "b10000_010".U(8.W) // 16,2 + // val CP0_CONFIG3_ADDR = "b10000_011".U(8.W) // 16,3 + // val CP0_CONFIG4_ADDR = "b10000_100".U(8.W) // 16,4 (sel保留 6or7) + // val CP0_LOAD_LINKED_ADDR = "b10001_000".U(8.W) // 17,0 + val CP0_TAGLO_ADDR = "b11100_000".U(8.W) // 28,0 + val CP0_TAGHI_ADDR = "b11101_000".U(8.W) // 29,0 + val CP0_ERROR_EPC_ADDR = "b11110_000".U(8.W) // 30,0 + + val CP0_ADDR_WID = 8 + + val PTEBASE_WID = 9 + + // 例外类型 + val EXCODE_WID = 5 + + val EX_NO = 0.U(EXCODE_WID.W) // 无异常 + val EX_INT = 1.U(EXCODE_WID.W) // 中断异常 + val EX_MOD = 2.U(EXCODE_WID.W) // TLB 条目修改异常 + val EX_TLBL = 3.U(EXCODE_WID.W) // TLB 非法取指令或访问异常 + val EX_TLBS = 4.U(EXCODE_WID.W) // TLB 非法存储访问异常 + val EX_ADEL = 5.U(EXCODE_WID.W) // 地址未对齐异常(取指令或访问异常) + val EX_ADES = 6.U(EXCODE_WID.W) // 地址未对齐异常(存储访问异常) + val EX_SYS = 7.U(EXCODE_WID.W) // 系统调用异常 + val EX_BP = 8.U(EXCODE_WID.W) // 断点异常 + val EX_RI = 9.U(EXCODE_WID.W) // 保留指令异常 + val EX_CPU = 10.U(EXCODE_WID.W) // 协处理器不可用异常 + val EX_OV = 11.U(EXCODE_WID.W) // 算术溢出异常 + + val EXC_INT = "h00".U(EXCODE_WID.W) // 中断异常 + val EXC_MOD = "h01".U(EXCODE_WID.W) // TLB 条目修改异常 + val EXC_TLBL = "h02".U(EXCODE_WID.W) // TLB 非法取指令或访问异常 + val EXC_TLBS = "h03".U(EXCODE_WID.W) // TLB 非法存储访问异常 + val EXC_ADEL = "h04".U(EXCODE_WID.W) // 地址未对齐异常(取指令或访问异常) + val EXC_ADES = "h05".U(EXCODE_WID.W) // 地址未对齐异常(存储访问异常) + val EXC_SYS = "h08".U(EXCODE_WID.W) // 系统调用异常 + val EXC_BP = "h09".U(EXCODE_WID.W) // 断点异常 + val EXC_RI = "h0a".U(EXCODE_WID.W) // 保留指令异常 + val EXC_CPU = "h0b".U(EXCODE_WID.W) // 协处理器不可用异常 + val EXC_OV = "h0c".U(EXCODE_WID.W) // 算术溢出异常 + val EXC_NO = "h1f".U(EXCODE_WID.W) // 无异常 + + val EX_ENTRY = "h_bfc00380".U(32.W) + val EX_TLB_REFILL_ENTRY = "h_bfc00200".U(32.W) + + // TLB MMU + val TLB_NUM = if (config.build) 8 else 32 // for sys 32, other 8 + val PFN_WID = 20 + val C_WID = 3 + val ASID_WID = 8 + val VPN2_WID = 19 +} +trait OptionConst { + + // 写寄存器目标 Write Register Address type + val WRA_T1 = 0.U(2.W) // 取inst(15,11) + val WRA_T2 = 1.U(2.W) // 取inst(20,16) + val WRA_T3 = 2.U(2.W) // 取"b11111", 即31号寄存器 + val WRA_X = 0.U(2.W) // not care + val AREG_31 = "b11111".U(5.W) + + // 立即数类型 + private val IL = 3 + val IMM_N = 0.U(IL.W) + val IMM_LSE = 1.U(IL.W) // 立即数取inst(15,0)作为低16位,符号扩展,适用于ADDI,ADDIU,SLTI,和SLTIU + val IMM_LZE = 2.U(IL.W) // 立即数取inst(15,0)作为低16位,零扩展,适用于位操作指令 + val IMM_HZE = 3.U(IL.W) // 立即数取inst(15,0)作为高16位,零扩展,适用于LUI (是否有必要?) + val IMM_SHT = 4.U(IL.W) // 立即数取inst(10,6)作为低5位,不关心扩展,适用于SLL,SRL,SRA +} + +object Const extends Constants with Instructions with OptionConst diff --git a/chisel/playground/src/defines/Cp0Bundles.scala b/chisel/playground/src/defines/Cp0Bundles.scala new file mode 100644 index 0000000..0defaba --- /dev/null +++ b/chisel/playground/src/defines/Cp0Bundles.scala @@ -0,0 +1,128 @@ +package cpu.defines + +import chisel3._ +import chisel3.util._ +import cpu.defines.Const._ + +class Cp0Index extends Bundle { + val p = Bool() + val blank = UInt((32 - 1 - log2Ceil(TLB_NUM)).W) + val index = UInt(log2Ceil(TLB_NUM).W) +} + +class Cp0Random extends Bundle { + val blank = UInt((32 - log2Ceil(TLB_NUM)).W) + val random = UInt(log2Ceil(TLB_NUM).W) +} + +class Cp0EntryLo extends Bundle { + val fill = UInt((32 - PFN_WID - C_WID - 3).W) + val pfn = UInt(PFN_WID.W) + val c = UInt(C_WID.W) + val d = Bool() + val v = Bool() + val g = Bool() +} + +class Cp0Context extends Bundle { + val ptebase = UInt(PTEBASE_WID.W) + val badvpn2 = UInt(VPN2_WID.W) + val blank = UInt((32 - PTEBASE_WID - VPN2_WID).W) +} + +class Cp0Wired extends Bundle { + val blank = UInt((31 - log2Ceil(TLB_NUM)).W) + val wired = UInt(log2Ceil(TLB_NUM).W) +} + +class Cp0BadVAddr extends Bundle { + val badvaddr = UInt(PC_WID.W) +} + +class Cp0Count extends Bundle { + val count = UInt(DATA_WID.W) +} + +class Cp0EntryHi extends Bundle { + val vpn2 = UInt(VPN2_WID.W) + val blank = UInt((32 - VPN2_WID - ASID_WID).W) + val asid = UInt(ASID_WID.W) +} + +class Cp0Compare extends Bundle { + val compare = UInt(DATA_WID.W) +} + +class Cp0Status extends Bundle { + val blank3 = UInt(3.W) + val cu0 = Bool() + val blank2 = UInt(5.W) + val bev = Bool() + val blank1 = UInt(6.W) + val im = UInt(8.W) + val blank0 = UInt(3.W) + val um = Bool() + val r0 = Bool() + val erl = Bool() + val exl = Bool() + val ie = Bool() +} + +class Cp0Cause extends Bundle { + val bd = Bool() + val blank3 = UInt(7.W) + val iv = Bool() + val blank2 = UInt(7.W) + val ip = UInt(8.W) + val blank1 = Bool() + val excode = UInt(5.W) + val blank0 = UInt(2.W) +} + +class Cp0Epc extends Bundle { + val epc = UInt(PC_WID.W) +} + +class Cp0Ebase extends Bundle { + val fill = Bool() + val blank1 = Bool() + val ebase = UInt(18.W) + val blank0 = UInt(2.W) + val cpuNum = UInt(10.W) +} + +class Cp0Config extends Bundle { + val m = Bool() + val k23 = UInt(3.W) + val ku = UInt(3.W) + val impl = UInt(9.W) + val be = Bool() + val at = UInt(2.W) + val ar = UInt(3.W) + val mt = UInt(3.W) + val blank = UInt(3.W) + val vi = Bool() + val k0 = UInt(3.W) +} + +class Cp0Config1 extends Bundle { + val m = Bool() + val ms = UInt(6.W) + val is = UInt(3.W) + val il = UInt(3.W) + val ia = UInt(3.W) + val ds = UInt(3.W) + val dl = UInt(3.W) + val da = UInt(3.W) + val c2 = Bool() + val md = Bool() + val pc = Bool() + val wr = Bool() + val ca = Bool() + val ep = Bool() + val fp = Bool() +} + +class Cp0ErrorEpc extends Bundle { + val errorEpc = UInt(PC_WID.W) +} diff --git a/chisel/playground/src/defines/Instructions.scala b/chisel/playground/src/defines/Instructions.scala new file mode 100644 index 0000000..e86def8 --- /dev/null +++ b/chisel/playground/src/defines/Instructions.scala @@ -0,0 +1,227 @@ +package cpu.defines + +import chisel3._ +import chisel3.util.BitPat + +trait Instructions { + // @formatter:off + // | | | | | | | + def ADD = BitPat("b000000???????????????00000100000") + def ADDI = BitPat("b001000??????????????????????????") + def ADDIU = BitPat("b001001??????????????????????????") + def ADDU = BitPat("b000000???????????????00000100001") + def AND = BitPat("b000000???????????????00000100100") + def ANDI = BitPat("b001100??????????????????????????") + def BEQ = BitPat("b000100??????????????????????????") + def BGEZ = BitPat("b000001?????00001????????????????") + def BGEZAL = BitPat("b000001?????10001????????????????") + def BGTZ = BitPat("b000111?????00000????????????????") + def BLEZ = BitPat("b000110?????00000????????????????") + def BLTZ = BitPat("b000001?????00000????????????????") + def BLTZAL = BitPat("b000001?????10000????????????????") + def BNE = BitPat("b000101??????????????????????????") + def BNEL = BitPat("b010101??????????????????????????") + def BLTZL = BitPat("b000001?????00010????????????????") + def BLTZALL = BitPat("b000001?????10010????????????????") + def BLEZL = BitPat("b010110?????00000????????????????") + def BGTZL = BitPat("b010111?????00000????????????????") + def BGEZL = BitPat("b000001?????00011????????????????") + def BGEZALL = BitPat("b000001?????10011????????????????") + def BEQL = BitPat("b010100??????????????????????????") + def BREAK = BitPat("b000000????????????????????001101") + def CACHE = BitPat("b101111??????????????????????????") + def CLO = BitPat("b011100???????????????00000100001") // Count Leading Ones in Word + def CLZ = BitPat("b011100???????????????00000100000") // Count Leading Zeros in Word + def DIV = BitPat("b000000??????????0000000000011010") // Divide Word + def DIVU = BitPat("b000000??????????0000000000011011") // Divide Unsigned Word + def ERET = BitPat("b01000010000000000000000000011000") // Exception Return + def J = BitPat("b000010??????????????????????????") // Jump + def JAL = BitPat("b000011??????????????????????????") // Jump and Link + def JALR = BitPat("b000000?????00000??????????001001") // Jump and Link Register + def JR = BitPat("b000000?????0000000000?????001000") // Jump Register + def LB = BitPat("b100000??????????????????????????") // Load Byte + def LBU = BitPat("b100100??????????????????????????") // Load Byte Unsigned + def LH = BitPat("b100001??????????????????????????") // Load Halfword + def LHU = BitPat("b100101??????????????????????????") // Load Halfword Unsigned + def LL = BitPat("b110000??????????????????????????") // Load Linked Word + def LUI = BitPat("b00111100000?????????????????????") // Load Upper Immediate + def LW = BitPat("b100011??????????????????????????") // Load Word + def LWL = BitPat("b100010??????????????????????????") // Load Word Left + def LWR = BitPat("b100110??????????????????????????") // Load Word Right + def MADD = BitPat("b011100??????????0000000000000000") // Multiply and Add Word to Hi, Lo + def MADDU = BitPat("b011100??????????0000000000000001") // Multiply and Add Unsigned Word to Hi, Lo + def MFC0 = BitPat("b01000000000??????????00000000???") // Move from Coprocessor 0 + def MFHI = BitPat("b0000000000000000?????00000010000") // Move From HI Register + def MFLO = BitPat("b0000000000000000?????00000010010") // Move From LO Register + def MOVN = BitPat("b000000???????????????00000001011") // Move Conditional on Not Zero + def MOVZ = BitPat("b000000???????????????00000001010") // Move Conditional on Zero + def MSUB = BitPat("b011100??????????0000000000000100") // Multiply and Subtract Word to Hi, Lo + def MSUBU = BitPat("b011100??????????0000000000000101") // Multiply and Subtract Unsigned Word to Hi, Lo + def MTC0 = BitPat("b01000000100??????????00000000???") // Move to Coprocessor 0 + def MTHI = BitPat("b000000?????000000000000000010001") // Move to HI Register + def MTLO = BitPat("b000000?????000000000000000010011") // Move to LO Register + def MUL = BitPat("b011100???????????????00000000010") // Multiply Word to GPR + def MULT = BitPat("b000000??????????0000000000011000") // Multiply Word + def MULTU = BitPat("b000000??????????0000000000011001") // Multiply Unsigned Word + def NOP = BitPat("b00000000000000000000000000000000") // No Operation + def NOR = BitPat("b000000???????????????00000100111") // Not Or + def OR = BitPat("b000000???????????????00000100101") // Or + def ORI = BitPat("b001101??????????????????????????") // Or Immediate + def PREFX = BitPat("b010011???????????????00000001111") // Prefetch Indexed + def PREF = BitPat("b110011??????????????????????????") // Prefetch + def SB = BitPat("b101000??????????????????????????") // Store Byte + def SC = BitPat("b111000??????????????????????????") // Store Conditional Word + def SH = BitPat("b101001??????????????????????????") // Store Halfword + def SLL = BitPat("b00000000000???????????????000000") // Shift Word Left Logical + def SLLV = BitPat("b000000???????????????00000000100") // Shift Word Left Logical Variable + def SLT = BitPat("b000000???????????????00000101010") // Set on Less Than + def SLTI = BitPat("b001010??????????????????????????") // Set on Less Than Immediate + def SLTIU = BitPat("b001011??????????????????????????") // Set on less Than Immediate Unsigned + def SLTU = BitPat("b000000???????????????00000101011") // Set on less Than Unsigned + def SRA = BitPat("b00000000000???????????????000011") // Shift Word Right Arithmetic + def SRAV = BitPat("b000000???????????????00000000111") // Shift Word Right Arithmetic Variable + def SRL = BitPat("b00000000000???????????????000010") // Shift Word Right Logical + def SRLV = BitPat("b000000???????????????00000000110") // Shift Word Right Logical Variable + def SUB = BitPat("b000000???????????????00000100010") // Subtract Word + def SUBU = BitPat("b000000???????????????00000100011") // Subtract Unsigned Word + def SW = BitPat("b101011??????????????????????????") // Store Word + def SWL = BitPat("b101010??????????????????????????") // Store Word Left + def SWR = BitPat("b101110??????????????????????????") // Store Word Right + def SYNC = BitPat("b000000000000000000000?????001111") // To order loads and stores for shared memory + def SYSCALL = BitPat("b000000????????????????????001100") // System Call + def TEQ = BitPat("b000000????????????????????110100") // Trap if Equal + def TEQI = BitPat("b000001?????01100????????????????") // Trap if Equal Immediate + def TGE = BitPat("b000000????????????????????110000") // Trap if Greater or Equal + def TGEI = BitPat("b000001?????01000????????????????") // Trap if Greater or Equal Immediate + def TGEIU = BitPat("b000001?????01001????????????????") // Trap if Greater or Equal Immediate Unsigned + def TGEU = BitPat("b000000????????????????????110001") // Trap if Greater or Equal Unsigned + def TLBP = BitPat("b01000010000000000000000000001000") // Probe TLB for Matching Entry + def TLBR = BitPat("b01000010000000000000000000000001") // Read Indexed TLB Entry + def TLBWI = BitPat("b01000010000000000000000000000010") // Write Indexed TLB Entry + def TLBWR = BitPat("b01000010000000000000000000000110") // Write Random TLB Entry + def TLT = BitPat("b000000????????????????????110010") // Trap if Less Than + def TLTI = BitPat("b000001?????01010????????????????") // Trap if Less Than Immediate + def TLTIU = BitPat("b000001?????01011????????????????") // Trap if Less Than Immediate Unsigned + def TLTU = BitPat("b000000????????????????????110011") // Trap if less Than Unsigned + def TNE = BitPat("b000000????????????????????110110") // Trap if Not Equal + def TNEI = BitPat("b000001?????01110????????????????") // Trap if Not Equal Immediate + def WAIT = BitPat("b0100001???????????????????100000") // Enter Standby Mode + def XOR = BitPat("b000000???????????????00000100110") // Exclusive OR + def XORI = BitPat("b001110??????????????????????????") // Exclusive OR Immediate + // @formatter:on + + // BitPat can't use VecInit and contains + def isBranchInst(inst: UInt) = { + require(inst.getWidth == 32) + val bi = + Seq(J, JAL, JR, JALR, BEQ, BNE, BGTZ, BLEZ, BGEZ, BGEZAL, BLTZ, BLTZAL) + bi.foldLeft(false.B)((r, e) => r || (e === inst)) + } + def isJBranchInst(inst: UInt) = { + require(inst.getWidth == 32) + val jbi = Seq(J, JAL) + jbi.foldLeft(false.B)((r, e) => r || (e === inst)) + } +} + +trait unImpl { + // @formatter:off + // 未实现 + def WSBH = BitPat("b01111100000??????????00010100000") // Word Swap Bytes Within Halfwords + def WRPGPR = BitPat("b01000001110??????????00000000000") // Write to GPR in Previous Shadow Set + def TRUNC_W_fmt = BitPat("b010001?????00000??????????001101") // Floating Point Truncate to Word Fixed Point + def TRUNC_L_fmt = BitPat("b010001?????00000??????????001001") // Floating Point Truncate to Long Fixed Point + def SYNCI = BitPat("b000001?????11111????????????????") // Synchronize Caches to Make Instruction Writes Effective + def SWXC1 = BitPat("b010011???????????????00000001000") // Store Word Indexed from Floating Point + def SWC2 = BitPat("b111010??????????????????????????") // Store Word from Coprocessor 2 + def SWC1 = BitPat("b111001??????????????????????????") // Store Word from Floating Point + def SUXC1 = BitPat("b010011???????????????00000001101") // Store Doubleword Indexed Unaligned from Floating Point + def SUB_fmt = BitPat("b010001????????????????????000001") // Floating Point Subtract + def SSNOP = BitPat("b00000000000000000000000001000000") // Superscalar No Operation + def SQRT_fmt = BitPat("b010001?????00000??????????000100") // Floating Point Square Root + def SEH = BitPat("b01111100000??????????11000100000") // Sign-Extend Halfword + def SEB = BitPat("b01111100000??????????10000100000") // Sign-Extend Byte + def SDXC1 = BitPat("b010011???????????????00000001001") // Store Doubleword Indexed from Floating Point + def SDC2 = BitPat("b111110??????????????????????????") // Store Doubleword from Coprocessor 2 + def SDC1 = BitPat("b111101??????????????????????????") // Store Doubleword from Floating Point + def SDBBP = BitPat("b011100????????????????????111111") // Software Debug Breakpoint + def RSQRT_fmt = BitPat("b010001?????00000??????????010110") // Reciprocal Square Root Approximation + def ROUND_W_fmt = BitPat("b010001?????00000??????????001100") // Floating Point Round to Word Fixed Point + def ROUND_L_fmt = BitPat("b010001?????00000??????????001000") // Floating Point Round to Long Fixed Point + def ROTRV = BitPat("b000000???????????????00001000110") // Rotate Word Right Variable + def ROTR = BitPat("b00000000001???????????????000010") // Rotate Word Right + def RECIP_fmt = BitPat("b010001?????00000??????????010101") // Reciprocal Approximation + def RDPGPR = BitPat("b01000001010??????????00000000000") // Read GPR from Previous Shadow Set + def RDHWR = BitPat("b01111100000??????????00000111011") // Read Hardware Register + def PUU_PS = BitPat("b01000110110???????????????101111") // Pair Upper Upper + def PUL_PS = BitPat("b01000110110???????????????101110") // Pair Upper Lower + def PLU_PS = BitPat("b01000110110???????????????101101") // Pair Lower Upper + def PLL_PS = BitPat("b01000110110???????????????101100") // Pair Lower Lower + def PAUSE = BitPat("b00000000000000000000000101000000") // Wait for the LLBit to clear + def NMSUB_fmt = BitPat("b010011????????????????????111???") // Floating Point Negative Multiply Subtract + def NMADD_fmt = BitPat("b010011????????????????????110???") // Floating Point Negative Multiply Add + def NEG_fmt = BitPat("b010001?????00000??????????000111") // Floating Point Negate + def MUL_fmt = BitPat("b010001????????????????????000010") // Floating Point Multiply + def MTHC2 = BitPat("b01001000111?????????????????????") // Move Word to High of Coprocessor 2 Register + def MTHC1 = BitPat("b01000100111??????????00000000000") // Move Word to High Half of Floating Point Register + def MTC2 = BitPat("b01001000100?????????????????????") // Move Word to Coprocessor 2 + def MTC1 = BitPat("b01000100100??????????00000000000") // Move Word to Floating point + def MSUB_fmt = BitPat("b010011????????????????????101???") // Floating Point Multiply Subtract + def MOVZ_fmt = BitPat("b010001????????????????????010010") // Floating Point Move Conditional on Zero + def MOVT_fmt = BitPat("b010001????????01??????????010001") // Floating Point Move Conditional on Floating Point True + def MOVT = BitPat("b000000????????01?????00000000001") // Move Conditional on Floating Point True + def MOVN_fmt = BitPat("b010001????????????????????010011") // Floating Point Move Conditional on Not Zero + def MOVF_fmt = BitPat("b010001????????00??????????010001") // Floating Point Move Conditional on Floating Point False + def MOVF = BitPat("b000000????????00?????00000000001") // Move Conditional on Floating Point False + def MOV_fmt = BitPat("b010001?????00000??????????000110") // Floating Point Move + def MFHC2 = BitPat("b01001000011?????????????????????") // Move Word From High Half of Coprocessor 2 Register + def MFHC1 = BitPat("b01000100011??????????00000000000") // Move Word From High Half of Floating Point Register + def MFC2 = BitPat("b01001000000?????????????????????") // Move Word From Coprocessor 2 + def MFC1 = BitPat("b01000100000??????????00000000000") // Move Word from Floating Point + def MADD_fmt = BitPat("b010011????????????????????100???") // Floating Point Multiply Add + def LWXC1 = BitPat("b010011??????????00000?????000000") // Load Word Indexed to Floating Point + def LWC2 = BitPat("b110010??????????????????????????") // Load Word to Coprocessor 2 + def LWC1 = BitPat("b110001??????????????????????????") // Load Word to Floating Point + def LUXC1 = BitPat("b010011??????????00000?????000101") // Load Doubleword Indexed Unaligned to Floating Point + def LDXC1 = BitPat("b010011??????????00000?????000001") // Load Doubleword Indexed to Floating Point + def LDC2 = BitPat("b110110??????????????????????????") // Load Doubleword to Coprocessor 2 + def LDC1 = BitPat("b110101??????????????????????????") // Load Doubleword to Floating Point + def JR_HB = BitPat("b000000?????00000000001????001000") // Jump Register with Hazard Barrier + def JALX = BitPat("b011101??????????????????????????") // Jump and Link Exchange + def JALR_HB = BitPat("b000000?????00000?????1????001001") // Jump and Link Register with Hazard Barrier + def INS = BitPat("b011111????????????????????000100") // Insert Bit Field + def FLOOR_W_fmt = BitPat("b010001?????00000??????????001111") // Floating Point Floor Convert to Word Fixed Point + def FLOOR_L_fmt = BitPat("b010001?????00000??????????001011") // Floating Point Floor Convert to Long Fixed Point + def EXT = BitPat("b011111????????????????????000000") // Extract Bit Field + def EI = BitPat("b01000001011?????0110000000100000") // Enable Interrupts + def EHB = BitPat("b00000000000000000000000011000000") // Execution hazard Barrier + def DIV_fmt = BitPat("b010001????????????????????000011") // Floating Point Divide + def DI = BitPat("b01000001011?????0110000000000000") // Disable Interrupts + def DERET = BitPat("b01000010000000000000000000011111") // Debug Exception Return + def CVT_W_fmt = BitPat("b010001?????00000??????????100100") // Floating Point Convert to Word Fixed Point + def CVT_S_PU = BitPat("b0100011011000000??????????100000") // Floating Point Convert Pair Upper to Single Floating Point + def CVT_S_PL = BitPat("b0100011011000000??????????101000") // Floating Point Convert Pair Lower to Single Floating Point + def CVT_S_fmt = BitPat("b010001?????00000??????????100000") // Floating Point Convert to Single Floating Point + def CVT_PS_S = BitPat("b01000110000???????????????100110") // Floating Point Convert Pair to Paired Single + def CVT_L_fmt = BitPat("b010001?????00000??????????100101") // Floating Point Convert to Long Fixed Point + def CVT_D_fmt = BitPat("b010001?????00000??????????100001") // Floating Point Convert to Double Floating Point + def CTC2 = BitPat("b01001000110?????????????????????") // Move Control Word to Coprocessor 2 + def CTC1 = BitPat("b01000100110??????????00000000000") // Move Control Word to Floating Point + def COP2 = BitPat("b0100101?????????????????????????") // Coprocessor operation to Coprocessor 2 + def CFC2 = BitPat("b01001000010?????????????????????") // Move Control Word From Coprocessor 2 + def CFC1 = BitPat("b01000100010??????????00000000000") // Move Control Word From Floating Point + def CEIL_W_fmt = BitPat("b010001?????00000??????????001110") // Floating Point Ceiling Convert to Word Fixed Point + def CEIL_L_fmt = BitPat("b010001?????00000??????????001010") // Fixed Point Ceiling Convert to Long Fixed Point + def C_cond_fmt = BitPat("b010001??????????????????0011????") // Floating Point Compare + def BC2TL = BitPat("b01001001000???11????????????????") + def BC2T = BitPat("b01001001000???01????????????????") + def BC2FL = BitPat("b01001001000???10????????????????") + def BC2F = BitPat("b01001001000???00????????????????") + def BC1TL = BitPat("b01000101000???11????????????????") + def BC1T = BitPat("b01000101000???01????????????????") + def BC1FL = BitPat("b01000101000???10????????????????") + def BC1F = BitPat("b01000101000???00????????????????") + def ALNV_PS = BitPat("b010011????????????????????011110") + def ABS_fmt = BitPat("b010001?????00000??????????000101") + // @formatter:on +} diff --git a/chisel/playground/src/defines/StaticBundles.scala b/chisel/playground/src/defines/StaticBundles.scala new file mode 100644 index 0000000..1e82afe --- /dev/null +++ b/chisel/playground/src/defines/StaticBundles.scala @@ -0,0 +1,44 @@ +package cpu.defines + +import chisel3._ +import chisel3.util._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class SocStatistic extends Bundle { + val cp0_count = Output(UInt(32.W)) + val cp0_random = Output(UInt(32.W)) + val cp0_cause = Output(UInt(32.W)) + val int = Output(Bool()) + val commit = Output(Bool()) +} + +class BranchPredictorUnitStatistic extends Bundle { + val branch = Output(UInt(32.W)) + val success = Output(UInt(32.W)) +} + +class CPUStatistic extends Bundle { + val soc = new SocStatistic() + val bpu = new BranchPredictorUnitStatistic() +} + +class GlobalStatistic extends Bundle { + val cpu = new CPUStatistic() + val cache = new CacheStatistic() +} + +class ICacheStatistic extends Bundle { + val request = Output(UInt(32.W)) + val hit = Output(UInt(32.W)) +} + +class DCacheStatistic extends Bundle { + val request = Output(UInt(32.W)) + val hit = Output(UInt(32.W)) +} + +class CacheStatistic extends Bundle { + val icache = new ICacheStatistic() + val dcache = new DCacheStatistic() +} diff --git a/chisel/playground/src/defines/Util.scala b/chisel/playground/src/defines/Util.scala new file mode 100644 index 0000000..8313597 --- /dev/null +++ b/chisel/playground/src/defines/Util.scala @@ -0,0 +1,56 @@ +package cpu.defines + +import chisel3._ +import chisel3.util._ + +object Util { + def subwordModify(source: UInt, start: Int, md: UInt): UInt = { + val ms = md.getWidth + subwordModify(source, (start, start - ms + 1), md) + } + + def subwordModify(source: UInt, tuple: (Int, Int), md: UInt): UInt = { + val ws = source.getWidth + val ms = md.getWidth + val start = tuple._1 + val end = tuple._2 + require( + ws > start && start >= end && end >= 0, + s"ws: $ws, start: $start, end: $end" + ) + require(start - end == ms - 1) + if (end == 0) Cat(source(ws - 1, start + 1), md) + else if (start == ws - 1) Cat(md, source(end - 1, 0)) + else Cat(source(ws - 1, start + 1), md, source(end - 1, 0)) + } + + def listHasElement(list: Seq[UInt], element: UInt): Bool = { + list.foldLeft(false.B)((r, e) => r || (e === element)) + } + + def MAXnBIT(m: Int): BigInt = BigInt(1) << m + + def unsignedToSigned(s: BigInt, width: Int = 32): BigInt = { + val m = MAXnBIT(width - 1) + if (s >= m) s - 2 * m + else s + } + + def signedExtend(raw: UInt, to: Int = 32): UInt = { + signedExtend(raw, raw.getWidth, to) + } + + def signedExtend(raw: UInt, from: Int, to: Int): UInt = { + require(to > from && from >= 1) + Cat(Fill(to - from, raw(from - 1)), raw) + } + + def zeroExtend(raw: UInt, to: Int = 32): UInt = { + zeroExtend(raw, raw.getWidth, to) + } + + def zeroExtend(raw: UInt, from: Int, to: Int): UInt = { + require(to > from && from >= 1) + Cat(Fill(to - from, 0.U), raw) + } +} diff --git a/chisel/playground/src/mmu/TlbL1D.scala b/chisel/playground/src/mmu/TlbL1D.scala new file mode 100644 index 0000000..b633733 --- /dev/null +++ b/chisel/playground/src/mmu/TlbL1D.scala @@ -0,0 +1,70 @@ +package cpu.mmu + +import chisel3._ +import chisel3.util._ +import cpu.defines._ + +class DTLB extends ITLB { + val dirty = Bool() +} + +class TlbL1D extends Module { + val io = IO(new Bundle { + val cache = new Tlb_DCache() + val fence = Input(Bool()) + val cpu_stall = Input(Bool()) + val dcache_stall = Input(Bool()) + val addr = Input(UInt(32.W)) + + val mem_en = Input(Bool()) + val mem_write = Input(Bool()) + + val tlb1 = Output(new Tlb1InfoD()) + val tlb2 = Flipped(new Tlb2Info()) + }) + val dtlb = RegInit(0.U.asTypeOf(new DTLB())) + val vpn = io.addr(31, 12) + val direct_mapped = io.addr(31, 30) === 2.U(2.W) + + io.cache.uncached := Mux(direct_mapped, io.addr(29), dtlb.uncached) + io.cache.translation_ok := direct_mapped || (dtlb.vpn === vpn && dtlb.valid && (!io.mem_write || dtlb.dirty)) + + io.cache.tag := Mux(direct_mapped, Cat(0.U(3.W), io.addr(28, 12)), dtlb.ppn) + io.cache.pa := Cat(io.cache.tag, io.addr(11, 0)) + io.cache.tlb1_ok := dtlb.vpn === vpn && dtlb.valid + io.cache.hit := io.cache.fill && io.tlb2.found && io.tlb2.entry.v(vpn(0)) + + when(io.fence) { dtlb.valid := false.B } + + val tlb1 = RegInit(0.U.asTypeOf(new Tlb1InfoD())) + io.tlb1 <> tlb1 + + val tlb2 = RegInit(0.U.asTypeOf(new Bundle { val vpn2 = UInt(19.W) })) + io.tlb2.vpn2 <> tlb2.vpn2 + + when(io.cache.dcache_is_idle && !io.cache.fill && io.mem_en && !io.cache.translation_ok) { + when(io.cache.tlb1_ok) { + tlb1.modify := true.B + }.otherwise { + tlb2.vpn2 := vpn(19, 1) + } + }.elsewhen(io.cache.fill) { + when(io.tlb2.found) { + when(io.tlb2.entry.v(vpn(0))) { + dtlb.vpn := vpn + dtlb.ppn := io.tlb2.entry.pfn(vpn(0)) + dtlb.uncached := !io.tlb2.entry.c(vpn(0)) + dtlb.dirty := io.tlb2.entry.d(vpn(0)) + dtlb.valid := true.B + }.otherwise { + tlb1.invalid := true.B + } + }.otherwise { + tlb1.refill := true.B + } + }.elsewhen(io.cache.dcache_is_save && !io.cpu_stall && !io.dcache_stall) { + tlb1.invalid := false.B + tlb1.refill := false.B + tlb1.modify := false.B + } +} diff --git a/chisel/playground/src/mmu/TlbL1I.scala b/chisel/playground/src/mmu/TlbL1I.scala new file mode 100644 index 0000000..3c27cb8 --- /dev/null +++ b/chisel/playground/src/mmu/TlbL1I.scala @@ -0,0 +1,59 @@ +package cpu.mmu + +import chisel3._ +import chisel3.util._ +import cpu.defines._ + +class ITLB extends Bundle { + val vpn = UInt(20.W) + val ppn = UInt(20.W) + val uncached = Bool() + val valid = Bool() +} + +class TlbL1I extends Module { + val io = IO(new Bundle { + val addr = Input(UInt(32.W)) + val fence = Input(Bool()) + val cpu_stall = Input(Bool()) + val icache_stall = Input(Bool()) + val cache = new Tlb_ICache() + val tlb1 = Output(new Tlb1InfoI()) + val tlb2 = Flipped(new Tlb2Info()) + }) + val itlb = RegInit(0.U.asTypeOf(new ITLB())) + val vpn = io.addr(31, 12) + val direct_mapped = io.addr(31, 30) === 2.U(2.W) + + io.cache.uncached := Mux(direct_mapped, io.addr(29), itlb.uncached) + io.cache.translation_ok := direct_mapped || (itlb.vpn === vpn && itlb.valid) + io.cache.hit := io.tlb2.found && io.tlb2.entry.v(vpn(0)) + io.cache.tag := Mux(direct_mapped, Cat(0.U(3.W), io.addr(28, 12)), itlb.ppn) + io.cache.pa := Cat(io.cache.tag, io.addr(11, 0)) + + when(io.fence && !io.icache_stall && !io.cpu_stall) { itlb.valid := false.B } + + // * tlb1 * // + val tlb1 = RegInit(0.U.asTypeOf(new Tlb1InfoI())) + tlb1 <> io.tlb1 + + io.tlb2.vpn2 := vpn(19, 1) + + when(io.cache.fill) { + when(io.tlb2.found) { + when(io.tlb2.entry.v(vpn(0))) { + itlb.vpn := vpn + itlb.ppn := io.tlb2.entry.pfn(vpn(0)) + itlb.uncached := !io.tlb2.entry.c(vpn(0)) + itlb.valid := true.B + }.otherwise { + tlb1.invalid := true.B + } + }.otherwise { + tlb1.refill := true.B + } + }.elsewhen(io.cache.icache_is_save && !io.cpu_stall && !io.icache_stall) { + tlb1.invalid := false.B + tlb1.refill := false.B + } +} diff --git a/chisel/playground/src/mmu/TlbL2.scala b/chisel/playground/src/mmu/TlbL2.scala new file mode 100644 index 0000000..e83225c --- /dev/null +++ b/chisel/playground/src/mmu/TlbL2.scala @@ -0,0 +1,69 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.TlbEntry +import cpu.defines.Const._ + +class TlbL2 extends Module { + val io = IO(new Bundle { + val in = Input(new Bundle { + val write = new Bundle { + val en = Bool() + val index = UInt(log2Ceil(TLB_NUM).W) + val entry = new TlbEntry() + } + val read = new Bundle { + val index = UInt(log2Ceil(TLB_NUM).W) + } + val entry_hi = new Bundle { + val vpn2 = UInt(VPN2_WID.W) + val asid = UInt(ASID_WID.W) + } + val tlb1_vpn2 = UInt(VPN2_WID.W) + val tlb2_vpn2 = UInt(VPN2_WID.W) + }) + val out = Output(new Bundle { + val read = new Bundle { + val entry = new TlbEntry() + } + val tlb1_found = Bool() + val tlb2_found = Bool() + val tlb1_entry = new TlbEntry() + val tlb2_entry = new TlbEntry() + val tlb_found = Bool() + val tlb_match_index = UInt(log2Ceil(TLB_NUM).W) + }) + }) + // tlb l2 + val tlb_l2 = RegInit(VecInit(Seq.fill(TLB_NUM)(0.U.asTypeOf(new TlbEntry())))) + + val tlb_match = Seq.fill(3)(Wire(Vec(TLB_NUM, Bool()))) + val tlb_find_vpn2 = Wire(Vec(3, UInt(VPN2_WID.W))) + val tlb_match_index = Wire(Vec(3, UInt(log2Ceil(TLB_NUM).W))) + + tlb_find_vpn2(0) := io.in.entry_hi.vpn2 + tlb_find_vpn2(1) := io.in.tlb1_vpn2 + tlb_find_vpn2(2) := io.in.tlb2_vpn2 + + io.out.tlb1_found := tlb_match(1).asUInt.orR + io.out.tlb2_found := tlb_match(2).asUInt.orR + io.out.tlb1_entry := tlb_l2(tlb_match_index(1)) + io.out.tlb2_entry := tlb_l2(tlb_match_index(2)) + io.out.tlb_found := tlb_match(0).asUInt.orR + io.out.tlb_match_index := tlb_match_index(0) + io.out.read.entry := tlb_l2(io.in.read.index) + + for (i <- 0 until (3)) { + for (j <- 0 until (TLB_NUM)) { + tlb_match(i)(j) := (tlb_l2(j).g || tlb_l2(j).asid === io.in.entry_hi.asid) && + (tlb_l2(j).vpn2 === tlb_find_vpn2(i)) + } + tlb_match_index(i) := PriorityEncoder(tlb_match(i)) + } + + when(io.in.write.en) { + tlb_l2(io.in.write.index) := io.in.write.entry + } +} diff --git a/chisel/playground/src/pipeline/decoder/ARegfile.scala b/chisel/playground/src/pipeline/decoder/ARegfile.scala new file mode 100644 index 0000000..62e3505 --- /dev/null +++ b/chisel/playground/src/pipeline/decoder/ARegfile.scala @@ -0,0 +1,72 @@ +package cpu.pipeline.decoder + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class SrcRead extends Bundle { + val raddr = Output(UInt(REG_ADDR_WID.W)) + val rdata = Input(UInt(DATA_WID.W)) +} + +class Src12Read extends Bundle { + val src1 = new SrcRead() + val src2 = new SrcRead() +} + +class RegWrite extends Bundle { + val wen = Output(Bool()) + val waddr = Output(UInt(REG_ADDR_WID.W)) + val wdata = Output(UInt(DATA_WID.W)) +} + +class ARegFile(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val read = Flipped(Vec(config.decoderNum, new Src12Read())) + val write = Flipped(Vec(config.commitNum, new RegWrite())) + val bpu = if (config.branchPredictor == "pesudo") Some(Flipped(new Src12Read())) else None + }) + + // 定义32个32位寄存器 + val regs = RegInit(VecInit(Seq.fill(AREG_NUM)(0.U(DATA_WID.W)))) + + // 写寄存器堆 + for (i <- 0 until (config.commitNum)) { + when(io.write(i).wen && io.write(i).waddr =/= 0.U) { + regs(io.write(i).waddr) := io.write(i).wdata + } + } + + // 读寄存器堆 + for (i <- 0 until (config.decoderNum)) { + // src1 + when(io.read(i).src1.raddr === 0.U) { + io.read(i).src1.rdata := 0.U + }.otherwise { + io.read(i).src1.rdata := regs(io.read(i).src1.raddr) + for (j <- 0 until (config.commitNum)) { + when(io.write(j).wen && io.read(i).src1.raddr === io.write(j).waddr) { + io.read(i).src1.rdata := io.write(j).wdata + } + } + } + // src2 + when(io.read(i).src2.raddr === 0.U) { + io.read(i).src2.rdata := 0.U + }.otherwise { + io.read(i).src2.rdata := regs(io.read(i).src2.raddr) + for (j <- 0 until (config.commitNum)) { + when(io.write(j).wen && io.read(i).src2.raddr === io.write(j).waddr) { + io.read(i).src2.rdata := io.write(j).wdata + } + } + } + } + + if (config.branchPredictor == "pesudo") { + io.bpu.get.src1.rdata := regs(io.bpu.get.src1.raddr) + io.bpu.get.src2.rdata := regs(io.bpu.get.src2.raddr) + } +} diff --git a/chisel/playground/src/pipeline/decoder/Decoder.scala b/chisel/playground/src/pipeline/decoder/Decoder.scala new file mode 100644 index 0000000..e9953ba --- /dev/null +++ b/chisel/playground/src/pipeline/decoder/Decoder.scala @@ -0,0 +1,194 @@ +package cpu.pipeline.decoder + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ + +class Decoder extends Module { + val io = IO(new Bundle { + // inputs + val in = Input(new Bundle { + val inst = UInt(INST_WID.W) + }) + // outputs + val out = Output(new InstInfo()) + }) + val inst = io.in.inst + + val signals: List[UInt] = ListLookup( + //@formatter:off + inst, + List(INST_INVALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_NOP, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + Array( /* inst_valid | reg1_ren | reg2_ren | fusel | op | reg_wen | reg_waddr | imm_type | dual_issue */ + // NOP + NOP -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_NOP, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + // 位操作 + OR -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_OR, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + AND -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_AND, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + XOR -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_XOR, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + NOR -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_NOR, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + // 移位 + SLLV -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_SLL, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + SRLV -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_SRL, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + SRAV -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_SRA, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + SLL -> List(INST_VALID, READ_DISABLE, READ_ENABLE, FU_ALU, EXE_SLL, WRITE_ENABLE, WRA_T1, IMM_SHT, DUAL_ISSUE), + SRL -> List(INST_VALID, READ_DISABLE, READ_ENABLE, FU_ALU, EXE_SRL, WRITE_ENABLE, WRA_T1, IMM_SHT, DUAL_ISSUE), + SRA -> List(INST_VALID, READ_DISABLE, READ_ENABLE, FU_ALU, EXE_SRA, WRITE_ENABLE, WRA_T1, IMM_SHT, DUAL_ISSUE), + // 立即数 + ORI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_OR, WRITE_ENABLE, WRA_T2, IMM_LZE, DUAL_ISSUE), + ANDI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_AND, WRITE_ENABLE, WRA_T2, IMM_LZE, DUAL_ISSUE), + XORI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_XOR, WRITE_ENABLE, WRA_T2, IMM_LZE, DUAL_ISSUE), + LUI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_OR, WRITE_ENABLE, WRA_T2, IMM_HZE, DUAL_ISSUE), + + // Move + MOVN -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_MOVN, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + MOVZ -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_MOVZ, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + + // HI,LO的Move指令 + MFHI -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_MFHILO, EXE_MFHI, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + MFLO -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_MFHILO, EXE_MFLO, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + MTHI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_MTHILO, EXE_MTHI, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + MTLO -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_MTHILO, EXE_MTLO, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + + // C0的Move指令 + MFC0 -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_MFC0, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + MTC0 -> List(INST_VALID, READ_DISABLE, READ_ENABLE, FU_ALU, EXE_MTC0, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + + // 比较指令 + SLT -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_SLT, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + SLTU -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_SLTU, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + // 立即数 + SLTI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_SLT, WRITE_ENABLE, WRA_T2, IMM_LSE, DUAL_ISSUE), + SLTIU -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_SLTU, WRITE_ENABLE, WRA_T2, IMM_LSE, DUAL_ISSUE), + + // Trap + TEQ -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_EX, EXE_TEQ, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + TEQI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_EX, EXE_TEQ, WRITE_DISABLE, WRA_X, IMM_LSE, DUAL_ISSUE), + TGE -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_EX, EXE_TGE, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + TGEI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_EX, EXE_TGE, WRITE_DISABLE, WRA_X, IMM_LSE, DUAL_ISSUE), + TGEIU -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_EX, EXE_TGEU, WRITE_DISABLE, WRA_X, IMM_LSE, DUAL_ISSUE), + TGEU -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_EX, EXE_TGEU, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + TLT -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_EX, EXE_TLT, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + TLTI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_EX, EXE_TLT, WRITE_DISABLE, WRA_X, IMM_LSE, DUAL_ISSUE), + TLTU -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_EX, EXE_TLTU, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + TLTIU -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_EX, EXE_TLTU, WRITE_DISABLE, WRA_X, IMM_LSE, DUAL_ISSUE), + TNE -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_EX, EXE_TNE, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + TNEI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_EX, EXE_TNE, WRITE_DISABLE, WRA_X, IMM_LSE, DUAL_ISSUE), + + // 算术指令 + ADD -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_ADD, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + ADDU -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_ADDU, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + SUB -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_SUB, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + SUBU -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_ALU, EXE_SUBU, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + MUL -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MUL, EXE_MUL, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + MULT -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MUL, EXE_MULT, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + MULTU -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MUL, EXE_MULTU, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + MADD -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MUL, EXE_MADD, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + MADDU -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MUL, EXE_MADDU, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + MSUB -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MUL, EXE_MSUB, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + MSUBU -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MUL, EXE_MSUBU, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + DIV -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_DIV, EXE_DIV, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + DIVU -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_DIV, EXE_DIVU, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + CLO -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_CLO, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + CLZ -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_CLZ, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + // 立即数 + ADDI -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_ADD, WRITE_ENABLE, WRA_T2, IMM_LSE, DUAL_ISSUE), + ADDIU -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_ADDU, WRITE_ENABLE, WRA_T2, IMM_LSE, DUAL_ISSUE), + // 跳转指令 + J -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_BR, EXE_J, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + JAL -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_BR, EXE_JAL, WRITE_ENABLE, WRA_T3, IMM_N, DUAL_ISSUE), + JR -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_BR, EXE_JR, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + JALR -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_BR, EXE_JALR, WRITE_ENABLE, WRA_T1, IMM_N, DUAL_ISSUE), + BEQ -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_BR, EXE_BEQ, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + BNE -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_BR, EXE_BNE, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + BGTZ -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_BR, EXE_BGTZ, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + BLEZ -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_BR, EXE_BLEZ, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + BGEZ -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_BR, EXE_BGEZ, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + BGEZAL -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_BR, EXE_BGEZAL, WRITE_ENABLE, WRA_T3, IMM_N, DUAL_ISSUE), + BLTZ -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_BR, EXE_BLTZ, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + BLTZAL -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_BR, EXE_BLTZAL, WRITE_ENABLE, WRA_T3, IMM_N, DUAL_ISSUE), + + // TLB + TLBP -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_TLBP, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + TLBR -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_TLBR, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + TLBWI -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_TLBWI, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + TLBWR -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_TLBWR, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + + // 例外指令 + SYSCALL -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_EX, EXE_SYSCALL, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + BREAK -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_EX, EXE_BREAK, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + ERET -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_EX, EXE_ERET, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + WAIT -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_NOP, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + + // 访存指令 + LB -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_MEM, EXE_LB, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + LBU -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_MEM, EXE_LBU, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + LH -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_MEM, EXE_LH, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + LHU -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_MEM, EXE_LHU, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + LW -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_MEM, EXE_LW, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + SB -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MEM, EXE_SB, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + SH -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MEM, EXE_SH, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + SW -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MEM, EXE_SW, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + LWL -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MEM, EXE_LWL, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + LWR -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MEM, EXE_LWR, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + SWL -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MEM, EXE_SWL, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + SWR -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MEM, EXE_SWR, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + + LL -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_MEM, EXE_LL, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + SC -> List(INST_VALID, READ_ENABLE, READ_ENABLE, FU_MEM, EXE_SC, WRITE_ENABLE, WRA_T2, IMM_N, DUAL_ISSUE), + + SYNC -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_EX, EXE_NOP, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + PREF -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_NOP, WRITE_ENABLE, WRA_X, IMM_N, DUAL_ISSUE), + PREFX -> List(INST_VALID, READ_DISABLE, READ_DISABLE, FU_ALU, EXE_NOP, WRITE_DISABLE, WRA_X, IMM_N, DUAL_ISSUE), + + // Cache + CACHE -> List(INST_VALID, READ_ENABLE, READ_DISABLE, FU_ALU, EXE_CACHE, WRITE_DISABLE, WRA_X, IMM_N, SINGLE_ISSUE), + ), + // @formatter:on + ) + val inst_valid :: reg1_ren :: reg2_ren :: fusel :: op :: reg_wen :: reg_waddr_type :: imm_type :: dual_issue :: Nil = + signals + + val rt = inst(20, 16) + val rd = inst(15, 11) + val sa = inst(10, 6) + val rs = inst(25, 21) + val imm16 = inst(15, 0) + + io.out.inst_valid := inst_valid + io.out.reg1_ren := reg1_ren + io.out.reg1_raddr := rs + io.out.reg2_ren := reg2_ren + io.out.reg2_raddr := rt + io.out.fusel := fusel + io.out.op := op + io.out.reg_wen := reg_wen + io.out.reg_waddr := MuxLookup(reg_waddr_type, AREG_31)( // 取"b11111", 即31号寄存器 + Seq( + WRA_T1 -> rd, // 取inst(15,11) + WRA_T2 -> rt // 取inst(20,16) + ) + ) + io.out.imm32 := MuxLookup(imm_type, Util.zeroExtend(sa))( // default IMM_SHT + Seq( + IMM_LSE -> Util.signedExtend(imm16), + IMM_LZE -> Util.zeroExtend(imm16), + IMM_HZE -> Cat(imm16, Fill(16, 0.U)) + ) + ) + io.out.cp0_addr := Cat(inst(15, 11), inst(2, 0)) + io.out.dual_issue := dual_issue + io.out.whilo := VecInit(FU_MUL, FU_DIV, FU_MTHILO).contains(fusel) && op =/= EXE_MUL // MUL不写HILO + io.out.inst := inst + io.out.wmem := fusel === FU_MEM && (!reg_wen.orR || op === EXE_SC) + io.out.rmem := fusel === FU_MEM && reg_wen.orR + io.out.mul := fusel === FU_MUL + io.out.div := fusel === FU_DIV + io.out.ifence := inst(16) === 0.U && op === EXE_CACHE + io.out.dfence := inst(16) === 1.U && op === EXE_CACHE + io.out.tlbfence := VecInit(EXE_MTC0, EXE_TLBWI, EXE_TLBWR).contains(op) + io.out.branch_link := VecInit(EXE_JAL, EXE_JALR, EXE_BGEZAL, EXE_BLTZAL).contains(op) + io.out.mem_addr := DontCare + io.out.mem_wreg := VecInit(EXE_LB, EXE_LBU, EXE_LH, EXE_LHU, EXE_LW, EXE_LL, EXE_LWL, EXE_LWR).contains(op) +} diff --git a/chisel/playground/src/pipeline/decoder/DecoderUnit.scala b/chisel/playground/src/pipeline/decoder/DecoderUnit.scala new file mode 100644 index 0000000..84ef201 --- /dev/null +++ b/chisel/playground/src/pipeline/decoder/DecoderUnit.scala @@ -0,0 +1,211 @@ +package cpu.pipeline.decoder + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.{BranchPredictorConfig, CpuConfig} +import cpu.pipeline.execute.DecoderUnitExecuteUnit +import cpu.pipeline.fetch.BufferUnit + +class InstFifoDecoderUnit(implicit val config: CpuConfig) extends Bundle { + val allow_to_go = Output(Vec(config.decoderNum, Bool())) + val inst = Input(Vec(config.decoderNum, new BufferUnit())) + val info = Input(new Bundle { + val inst0_is_in_delayslot = Bool() + val empty = Bool() + val almost_empty = Bool() + }) + + val jump_branch_inst = Output(Bool()) +} + +class DataForwardToDecoderUnit extends Bundle { + val exe = new RegWrite() + val mem_wreg = Bool() + val mem = new RegWrite() +} + +class Cp0DecoderUnit extends Bundle { + val access_allowed = Bool() + val kernel_mode = Bool() + val intterupt_allowed = Bool() + val cause_ip = UInt(8.W) + val status_im = UInt(8.W) +} + +class DecoderUnit(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + // 输入 + val instFifo = new InstFifoDecoderUnit() + val regfile = Vec(config.decoderNum, new Src12Read()) + val forward = Input(Vec(config.fuNum, new DataForwardToDecoderUnit())) + val cp0 = Input(new Cp0DecoderUnit()) + // 输出 + val fetchUnit = new Bundle { + val branch = Output(Bool()) + val target = Output(UInt(PC_WID.W)) + } + val bpu = new Bundle { + val bpuConfig = new BranchPredictorConfig() + val pc = Output(UInt(PC_WID.W)) + val decoded_inst0 = Output(new InstInfo()) + val id_allow_to_go = Output(Bool()) + val pht_index = Output(UInt(bpuConfig.phtDepth.W)) + + val branch_inst = Input(Bool()) + val pred_branch = Input(Bool()) + val branch_target = Input(UInt(PC_WID.W)) + val update_pht_index = Input(UInt(bpuConfig.phtDepth.W)) + } + val executeStage = Output(new DecoderUnitExecuteUnit()) + val ctrl = new DecoderUnitCtrl() + }) + + val issue = Module(new Issue()).io + val decoder = Seq.fill(config.decoderNum)(Module(new Decoder())) + val jumpCtrl = Module(new JumpCtrl()).io + val forwardCtrl = Module(new ForwardCtrl()).io + + io.regfile(0).src1.raddr := decoder(0).io.out.reg1_raddr + io.regfile(0).src2.raddr := decoder(0).io.out.reg2_raddr + io.regfile(1).src1.raddr := decoder(1).io.out.reg1_raddr + io.regfile(1).src2.raddr := decoder(1).io.out.reg2_raddr + + forwardCtrl.in.forward := io.forward + forwardCtrl.in.regfile := io.regfile // TODO:这里的连接可能有问题 + + issue.allow_to_go := io.ctrl.allow_to_go + issue.instFifo := io.instFifo.info + + jumpCtrl.in.allow_to_go := io.ctrl.allow_to_go + jumpCtrl.in.decoded_inst0 := decoder(0).io.out + jumpCtrl.in.forward := io.forward + jumpCtrl.in.pc := io.instFifo.inst(0).pc + jumpCtrl.in.reg1_data := io.regfile(0).src1.rdata + + val jump_branch_inst0 = jumpCtrl.out.jump_inst || io.bpu.branch_inst + val inst0_branch = jumpCtrl.out.jump || io.bpu.pred_branch + + io.fetchUnit.branch := inst0_branch + io.fetchUnit.target := Mux(io.bpu.pred_branch, io.bpu.branch_target, jumpCtrl.out.jump_target) + + io.instFifo.allow_to_go(0) := io.ctrl.allow_to_go + io.instFifo.allow_to_go(1) := issue.inst1.allow_to_go + io.instFifo.jump_branch_inst := jump_branch_inst0 + + io.bpu.id_allow_to_go := io.ctrl.allow_to_go + io.bpu.pc := io.instFifo.inst(0).pc + io.bpu.decoded_inst0 := decoder(0).io.out + io.bpu.pht_index := io.instFifo.inst(0).pht_index + + io.ctrl.inst0.src1.ren := decoder(0).io.out.reg1_ren + io.ctrl.inst0.src1.raddr := decoder(0).io.out.reg1_raddr + io.ctrl.inst0.src2.ren := decoder(0).io.out.reg2_ren + io.ctrl.inst0.src2.raddr := decoder(0).io.out.reg2_raddr + io.ctrl.branch := inst0_branch + + val pc = io.instFifo.inst.map(_.pc) + val inst = io.instFifo.inst.map(_.inst) + val inst_info = decoder.map(_.io.out) + val tlb_refill = io.instFifo.inst.map(_.tlb.refill) + val tlb_invalid = io.instFifo.inst.map(_.tlb.invalid) + val interrupt = io.cp0.intterupt_allowed && (io.cp0.cause_ip & io.cp0.status_im).orR && !io.instFifo.info.empty + + for (i <- 0 until (config.decoderNum)) { + decoder(i).io.in.inst := inst(i) + issue.decodeInst(i) := inst_info(i) + issue.execute(i).mem_wreg := io.forward(i).mem_wreg + issue.execute(i).reg_waddr := io.forward(i).exe.waddr + } + + io.executeStage.inst0.pc := pc(0) + io.executeStage.inst0.inst_info := inst_info(0) + io.executeStage.inst0.inst_info.reg_wen := MuxLookup(inst_info(0).op, inst_info(0).reg_wen)( + Seq( + EXE_MOVN -> (io.executeStage.inst0.src_info.src2_data =/= 0.U), + EXE_MOVZ -> (io.executeStage.inst0.src_info.src2_data === 0.U) + ) + ) + io.executeStage.inst0.inst_info.mem_addr := + io.executeStage.inst0.src_info.src1_data + Util.signedExtend(io.executeStage.inst0.inst_info.inst(15, 0)) + io.executeStage.inst0.src_info.src1_data := Mux( + inst_info(0).reg1_ren, + forwardCtrl.out.inst(0).src1.rdata, + decoder(0).io.out.imm32 + ) + io.executeStage.inst0.src_info.src2_data := Mux( + inst_info(0).reg2_ren, + forwardCtrl.out.inst(0).src2.rdata, + decoder(0).io.out.imm32 + ) + io.executeStage.inst0.ex.flush_req := + io.executeStage.inst0.ex.excode =/= EX_NO || + io.executeStage.inst0.ex.tlb_refill || + io.executeStage.inst0.ex.eret + io.executeStage.inst0.ex.tlb_refill := tlb_refill(0) + io.executeStage.inst0.ex.eret := inst_info(0).op === EXE_ERET + io.executeStage.inst0.ex.badvaddr := pc(0) + io.executeStage.inst0.ex.bd := io.instFifo.info.inst0_is_in_delayslot + val inst0_ex_cpu = + !io.cp0.access_allowed && VecInit(EXE_MFC0, EXE_MTC0, EXE_TLBR, EXE_TLBWI, EXE_TLBWR, EXE_TLBP, EXE_ERET, EXE_WAIT) + .contains(inst_info(0).op) + io.executeStage.inst0.ex.excode := MuxCase( + EX_NO, + Seq( + interrupt -> EX_INT, + (tlb_refill(0) || tlb_invalid(0)) -> EX_TLBL, + (pc(0)(1, 0).orR || (pc(0)(31) && !io.cp0.kernel_mode)) -> EX_ADEL, + (inst_info(0).inst_valid === INST_INVALID) -> EX_RI, + (inst_info(0).op === EXE_SYSCALL) -> EX_SYS, + (inst_info(0).op === EXE_BREAK) -> EX_BP, + (inst0_ex_cpu) -> EX_CPU + ) + ) + io.executeStage.inst0.jb_info.jump_regiser := jumpCtrl.out.jump_register + io.executeStage.inst0.jb_info.branch_inst := io.bpu.branch_inst + io.executeStage.inst0.jb_info.pred_branch := io.bpu.pred_branch + io.executeStage.inst0.jb_info.branch_target := io.bpu.branch_target + io.executeStage.inst0.jb_info.update_pht_index := io.bpu.update_pht_index + + io.executeStage.inst1.allow_to_go := issue.inst1.allow_to_go + io.executeStage.inst1.pc := pc(1) + io.executeStage.inst1.inst_info := inst_info(1) + io.executeStage.inst1.inst_info.reg_wen := MuxLookup(inst_info(1).op, inst_info(1).reg_wen)( + Seq( + EXE_MOVN -> (io.executeStage.inst1.src_info.src2_data =/= 0.U), + EXE_MOVZ -> (io.executeStage.inst1.src_info.src2_data === 0.U) + ) + ) + io.executeStage.inst1.inst_info.mem_addr := + io.executeStage.inst1.src_info.src1_data + Util.signedExtend(io.executeStage.inst1.inst_info.inst(15, 0)) + io.executeStage.inst1.src_info.src1_data := Mux( + inst_info(1).reg1_ren, + forwardCtrl.out.inst(1).src1.rdata, + decoder(1).io.out.imm32 + ) + io.executeStage.inst1.src_info.src2_data := Mux( + inst_info(1).reg2_ren, + forwardCtrl.out.inst(1).src2.rdata, + decoder(1).io.out.imm32 + ) + io.executeStage.inst1.ex.flush_req := io.executeStage.inst1.ex.excode =/= EX_NO || io.executeStage.inst1.ex.tlb_refill + io.executeStage.inst1.ex.tlb_refill := tlb_refill(1) + io.executeStage.inst1.ex.eret := inst_info(1).op === EXE_ERET + io.executeStage.inst1.ex.badvaddr := pc(1) + io.executeStage.inst1.ex.bd := issue.inst1.is_in_delayslot + val inst1_ex_cpu = + !io.cp0.access_allowed && VecInit(EXE_MFC0, EXE_MTC0, EXE_TLBR, EXE_TLBWI, EXE_TLBWR, EXE_TLBP, EXE_ERET, EXE_WAIT) + .contains(inst_info(1).op) + io.executeStage.inst1.ex.excode := MuxCase( + EX_NO, + Seq( + (tlb_refill(1) || tlb_invalid(1)) -> EX_TLBL, + (pc(1)(1, 0).orR || (pc(1)(31) && !io.cp0.kernel_mode)) -> EX_ADEL, + (inst_info(1).inst_valid === INST_INVALID) -> EX_RI, + (inst_info(1).op === EXE_SYSCALL) -> EX_SYS, + (inst_info(1).op === EXE_BREAK) -> EX_BP, + (inst1_ex_cpu) -> EX_CPU + ) + ) +} diff --git a/chisel/playground/src/pipeline/decoder/ForwardCtrl.scala b/chisel/playground/src/pipeline/decoder/ForwardCtrl.scala new file mode 100644 index 0000000..364542b --- /dev/null +++ b/chisel/playground/src/pipeline/decoder/ForwardCtrl.scala @@ -0,0 +1,64 @@ +package cpu.pipeline.decoder + +import chisel3._ +import chisel3.util._ + +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class ForwardCtrl(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val in = Input(new Bundle { + val forward = Vec(config.fuNum, new DataForwardToDecoderUnit()) + val regfile = Vec(config.decoderNum, new Src12Read()) + }) + val out = Output(new Bundle { + val inst = Vec(config.decoderNum, new Src12Read()) + }) + }) + + // wb优先度最低 + for (i <- 0 until (config.decoderNum)) { + io.out.inst(i).src1.raddr := DontCare + io.out.inst(i).src2.raddr := DontCare + io.out.inst(i).src1.rdata := io.in.regfile(i).src1.rdata + io.out.inst(i).src2.rdata := io.in.regfile(i).src2.rdata + } + + // mem优先度中 + for (i <- 0 until (config.decoderNum)) { + for (j <- 0 until (config.fuNum)) { + when( + io.in.forward(j).mem.wen && + io.in.forward(j).mem.waddr === io.in.regfile(i).src1.raddr, + ) { + io.out.inst(i).src1.rdata := io.in.forward(j).mem.wdata + } + when( + io.in.forward(j).mem.wen && + io.in.forward(j).mem.waddr === io.in.regfile(i).src2.raddr, + ) { + io.out.inst(i).src2.rdata := io.in.forward(j).mem.wdata + } + } + } + + // exe优先度高 + for (i <- 0 until (config.decoderNum)) { + for (j <- 0 until (config.fuNum)) { + when( + io.in.forward(j).exe.wen && !io.in.forward(j).mem_wreg && + io.in.forward(j).exe.waddr === io.in.regfile(i).src1.raddr, + ) { + io.out.inst(i).src1.rdata := io.in.forward(j).exe.wdata + } + when( + io.in.forward(j).exe.wen && !io.in.forward(j).mem_wreg && + io.in.forward(j).exe.waddr === io.in.regfile(i).src2.raddr, + ) { + io.out.inst(i).src2.rdata := io.in.forward(j).exe.wdata + } + } + } +} diff --git a/chisel/playground/src/pipeline/decoder/Issue.scala b/chisel/playground/src/pipeline/decoder/Issue.scala new file mode 100644 index 0000000..91e73a2 --- /dev/null +++ b/chisel/playground/src/pipeline/decoder/Issue.scala @@ -0,0 +1,62 @@ +package cpu.pipeline.decoder + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class Issue(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + // 输入 + val allow_to_go = Input(Bool()) + val instFifo = Input(new Bundle { + val empty = Bool() + val almost_empty = Bool() + }) + val decodeInst = Input(Vec(config.decoderNum, new InstInfo())) + val execute = Input(Vec(config.fuNum, new MemRead())) + // 输出 + val inst1 = Output(new Bundle { + val is_in_delayslot = Bool() + val allow_to_go = Bool() + }) + }) + + val inst0 = io.decodeInst(0) + val inst1 = io.decodeInst(1) + + // inst buffer是否存有至少2条指令 + val instFifo_invalid = io.instFifo.empty || io.instFifo.almost_empty + + // 结构冲突 + val mem_conflict = inst0.fusel === FU_MEM && inst1.fusel === FU_MEM + val mul_conflict = inst0.fusel === FU_MUL && inst1.fusel === FU_MUL + val div_conflict = inst0.fusel === FU_DIV && inst1.fusel === FU_DIV + val struct_conflict = mem_conflict || mul_conflict || div_conflict + + // 写后读冲突 + val load_stall = + io.execute(0).mem_wreg && (inst1.reg1_ren && inst1.reg1_raddr === io.execute(0).reg_waddr || + inst1.reg2_ren && inst1.reg2_raddr === io.execute(0).reg_waddr) || + io.execute(1).mem_wreg && (inst1.reg1_ren && inst1.reg1_raddr === io.execute(1).reg_waddr || + inst1.reg2_ren && inst1.reg2_raddr === io.execute(1).reg_waddr) + val raw_reg = + inst0.reg_wen && (inst0.reg_waddr === inst1.reg1_raddr && inst1.reg1_ren || inst0.reg_waddr === inst1.reg2_raddr && inst1.reg2_ren) + val raw_hilo = VecInit(FU_DIV, FU_MUL, FU_MTHILO).contains(inst0.fusel) && + VecInit(FU_DIV, FU_MUL, FU_MFHILO, FU_MTHILO).contains(inst1.fusel) + val raw_cp0 = + inst0.op === EXE_MTC0 && inst1.op === EXE_MFC0 && inst0.cp0_addr === inst1.cp0_addr + val data_conflict = raw_reg || raw_hilo || raw_cp0 || load_stall + + // 指令1是否在延迟槽中 + io.inst1.is_in_delayslot := inst0.fusel === FU_BR && io.inst1.allow_to_go + // 指令1是否允许执行 + io.inst1.allow_to_go := io.allow_to_go && + !instFifo_invalid && + inst0.dual_issue && + inst1.dual_issue && + !struct_conflict && + !data_conflict && + !VecInit(FU_BR, FU_EX).contains(io.decodeInst(1).fusel) +} diff --git a/chisel/playground/src/pipeline/decoder/JumpCtrl.scala b/chisel/playground/src/pipeline/decoder/JumpCtrl.scala new file mode 100644 index 0000000..44a5b3e --- /dev/null +++ b/chisel/playground/src/pipeline/decoder/JumpCtrl.scala @@ -0,0 +1,43 @@ +package cpu.pipeline.decoder + +import chisel3._ +import chisel3.util._ + +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class JumpCtrl(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val in = Input(new Bundle { + val allow_to_go = Bool() + val pc = UInt(PC_WID.W) + val decoded_inst0 = new InstInfo() + val reg1_data = UInt(DATA_WID.W) + val forward = Vec(config.fuNum, new DataForwardToDecoderUnit()) + }) + val out = Output(new Bundle { + val jump_inst = Bool() + val jump_register = Bool() + val jump = Bool() + val jump_target = UInt(PC_WID.W) + }) + }) + + val op = io.in.decoded_inst0.op + val jump_inst = VecInit(EXE_J, EXE_JAL).contains(op) + val jump_register_inst = VecInit(EXE_JR, EXE_JALR).contains(op) + io.out.jump_inst := jump_inst || jump_register_inst + io.out.jump := io.in.allow_to_go && (jump_inst || jump_register_inst && !io.out.jump_register) + io.out.jump_register := jump_register_inst && + ((io.in.forward(0).exe.wen && io.in.decoded_inst0.reg1_raddr === io.in.forward(0).exe.waddr) || + (io.in.forward(1).exe.wen && io.in.decoded_inst0.reg1_raddr === io.in.forward(1).exe.waddr) || + (io.in.forward(0).mem.wen && io.in.decoded_inst0.reg1_raddr === io.in.forward(0).mem.waddr) || + (io.in.forward(1).mem.wen && io.in.decoded_inst0.reg1_raddr === io.in.forward(1).mem.waddr)) + val pc_plus_4 = io.in.pc + 4.U(PC_WID.W) + io.out.jump_target := Mux( + jump_inst, + Cat(pc_plus_4(31, 28), io.in.decoded_inst0.inst(25, 0), 0.U(2.W)), + io.in.reg1_data, + ) +} diff --git a/chisel/playground/src/pipeline/execute/ALU.scala b/chisel/playground/src/pipeline/execute/ALU.scala new file mode 100644 index 0000000..66b3780 --- /dev/null +++ b/chisel/playground/src/pipeline/execute/ALU.scala @@ -0,0 +1,135 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ + +class DivSignal extends Bundle { + val ready = Input(Bool()) + val result = Input(UInt(HILO_WID.W)) + + val en = Output(Bool()) + val signed = Output(Bool()) +} +class MultSignal extends Bundle { + val ready = Input(Bool()) + val result = Input(UInt(HILO_WID.W)) + + val en = Output(Bool()) + val signed = Output(Bool()) +} +class Alu extends Module { + val io = IO(new Bundle { + val inst_info = Input(new InstInfo()) + val src_info = Input(new SrcInfo()) + val cp0_rdata = Input(UInt(DATA_WID.W)) + val llbit = Input(Bool()) + val hilo = new Bundle { + val rdata = Input(UInt(HILO_WID.W)) + val wdata = Output(UInt(HILO_WID.W)) + } + val mul = new MultSignal() + val div = new DivSignal() + val result = Output(UInt(DATA_WID.W)) + val overflow = Output(Bool()) + val trap = Output(Bool()) + }) + val op = io.inst_info.op + val src1 = io.src_info.src1_data + val src2 = io.src_info.src2_data + + val sum = src1 + src2 + val diff = src1 - src2 + val slt = src1.asSInt < src2.asSInt + val sltu = src1 < src2 + val clo = WireInit(32.U) + val clz = WireInit(32.U) + for (i <- 0 until 32) { + when(!src1(i)) { + clo := (31 - i).U + }.otherwise { + clz := (31 - i).U + } + } + + val hilo = io.hilo.rdata + + io.hilo.wdata := MuxLookup(op, 0.U)( + Seq( + EXE_MTHI -> Cat(src1, hilo(31, 0)), + EXE_MTLO -> Cat(hilo(63, 32), src1), + EXE_MULT -> Mux(io.mul.ready, io.mul.result, 0.U), + EXE_MULTU -> Mux(io.mul.ready, io.mul.result, 0.U), + EXE_MADD -> Mux(io.mul.ready, hilo + io.mul.result, 0.U), + EXE_MADDU -> Mux(io.mul.ready, hilo + io.mul.result, 0.U), + EXE_MSUB -> Mux(io.mul.ready, hilo - io.mul.result, 0.U), + EXE_MSUBU -> Mux(io.mul.ready, hilo - io.mul.result, 0.U), + EXE_DIV -> Mux(io.div.ready, io.div.result, 0.U), + EXE_DIVU -> Mux(io.div.ready, io.div.result, 0.U) + ) + ) + + io.mul.signed := VecInit(EXE_MULT, EXE_MUL, EXE_MADD, EXE_MSUB).contains(op) + io.mul.en := Mux( + VecInit(EXE_MUL, EXE_MULT, EXE_MULTU, EXE_MADD, EXE_MSUB, EXE_MADDU, EXE_MSUBU).contains(op), + !io.mul.ready, + false.B + ) + io.div.signed := VecInit(EXE_DIV).contains(op) + io.div.en := Mux(VecInit(EXE_DIV, EXE_DIVU).contains(op), !io.div.ready, false.B) + + io.result := MuxLookup(op, 0.U)( + Seq( + // 算数指令 + EXE_ADD -> sum, + EXE_ADDU -> sum, + EXE_SUB -> diff, + EXE_SUBU -> diff, + EXE_SLT -> slt, + EXE_SLTU -> sltu, + // 逻辑指令 + EXE_AND -> (src1 & src2), + EXE_OR -> (src1 | src2), + EXE_NOR -> (~(src1 | src2)), + EXE_XOR -> (src1 ^ src2), + // 移位指令 + EXE_SLL -> (src2 << src1(4, 0)), + EXE_SRL -> (src2 >> src1(4, 0)), + EXE_SRA -> ((src2.asSInt >> src1(4, 0)).asUInt), + // 数据移动指令 + EXE_MFHI -> io.hilo.rdata(63, 32), + EXE_MFLO -> io.hilo.rdata(31, 0), + EXE_MFC0 -> io.cp0_rdata, + EXE_MOVN -> src1, + EXE_MOVZ -> src1, + // 前导记数指令 + EXE_CLZ -> clz, + EXE_CLO -> clo, + // 特殊指令 + EXE_SC -> io.llbit, + // 乘除法 + EXE_MUL -> Mux(io.mul.ready, io.mul.result(31, 0), 0.U), + EXE_MULT -> Mux(io.mul.ready, io.mul.result(31, 0), 0.U), + EXE_MULTU -> Mux(io.mul.ready, io.mul.result(31, 0), 0.U) + ) + ) + + io.overflow := MuxLookup(op, false.B)( + Seq( + EXE_ADD -> ((src1(31) === src2(31)) & (src1(31) =/= sum(31))), + EXE_SUB -> ((src1(31) =/= src2(31)) & (src1(31) =/= diff(31))) + ) + ) + + io.trap := MuxLookup(op, false.B)( + Seq( + EXE_TEQ -> (src1 === src2), + EXE_TNE -> (src1 =/= src2), + EXE_TGE -> !slt, + EXE_TGEU -> !sltu, + EXE_TLT -> slt, + EXE_TLTU -> sltu + ) + ) +} diff --git a/chisel/playground/src/pipeline/execute/BranchCtrl.scala b/chisel/playground/src/pipeline/execute/BranchCtrl.scala new file mode 100644 index 0000000..460beaf --- /dev/null +++ b/chisel/playground/src/pipeline/execute/BranchCtrl.scala @@ -0,0 +1,35 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ + +class BranchCtrl extends Module { + val io = IO(new Bundle { + val in = new Bundle { + val inst_info = Input(new InstInfo()) + val src_info = Input(new SrcInfo()) + val pred_branch = Input(Bool()) + } + val out = new Bundle { + val branch = Output(Bool()) + val pred_fail = Output(Bool()) + } + }) + val src1 = io.in.src_info.src1_data + val src2 = io.in.src_info.src2_data + io.out.pred_fail := io.in.pred_branch =/= io.out.branch + io.out.branch := MuxLookup(io.in.inst_info.op, false.B)( + Seq( + EXE_BEQ -> (src1 === src2), + EXE_BNE -> (src1 =/= src2), + EXE_BGTZ -> (!src1(31) && (src1 =/= 0.U)), + EXE_BLEZ -> (src1(31) || src1 === 0.U), + EXE_BGEZ -> (!src1(31)), + EXE_BGEZAL -> (!src1(31)), + EXE_BLTZ -> (src1(31)), + EXE_BLTZAL -> (src1(31)) + ) + ) +} diff --git a/chisel/playground/src/pipeline/execute/Cp0.scala b/chisel/playground/src/pipeline/execute/Cp0.scala new file mode 100644 index 0000000..87a748f --- /dev/null +++ b/chisel/playground/src/pipeline/execute/Cp0.scala @@ -0,0 +1,453 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.pipeline.memory.Cp0Info +import cpu.CpuConfig +import cpu.pipeline.decoder.Cp0DecoderUnit + +class Cp0MemoryUnit(implicit val config: CpuConfig) extends Bundle { + val in = Input(new Bundle { + val inst = Vec( + config.fuNum, + new Bundle { + val pc = UInt(PC_WID.W) + val ex = new ExceptionInfo() + } + ) + }) + val out = Output(new Bundle { + val flush = Bool() + val flush_pc = UInt(PC_WID.W) + }) +} + +class Cp0ExecuteUnit(implicit val config: CpuConfig) extends Bundle { + val in = Input(new Bundle { + val inst_info = Vec(config.fuNum, new InstInfo()) + val mtc0_wdata = UInt(DATA_WID.W) + }) + val out = Output(new Bundle { + val cp0_rdata = Vec(config.fuNum, UInt(DATA_WID.W)) + val debug = Output(new Cp0Info()) + }) +} + +class Cp0(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val ext_int = Input(UInt(EXT_INT_WID.W)) + val ctrl = Input(new Bundle { + val exe_stall = Bool() + val mem_stall = Bool() + }) + val decoderUnit = Output(new Cp0DecoderUnit()) + val executeUnit = new Cp0ExecuteUnit() + val memoryUnit = new Cp0MemoryUnit() + val tlb = Vec( + 2, + new Bundle { + val vpn2 = Input(UInt(VPN2_WID.W)) + + val found = Output(Bool()) + val info = Output(new TlbEntry()) + } + ) + }) + // 优先使用inst0的信息 + val ex_sel = io.memoryUnit.in.inst(0).ex.flush_req || !io.memoryUnit.in.inst(1).ex.flush_req + val pc = Mux(ex_sel, io.memoryUnit.in.inst(0).pc, io.memoryUnit.in.inst(1).pc) + val ex = Mux(ex_sel, io.memoryUnit.in.inst(0).ex, io.memoryUnit.in.inst(1).ex) + val mtc0_wen = io.executeUnit.in.inst_info(0).op === EXE_MTC0 + val mtc0_wdata = io.executeUnit.in.mtc0_wdata + val mtc0_addr = io.executeUnit.in.inst_info(0).cp0_addr + val exe_op = io.executeUnit.in.inst_info(0).op + val exe_stall = io.ctrl.exe_stall + val mem_stall = io.ctrl.mem_stall + + val tlb_l2 = Module(new TlbL2()).io + + tlb_l2.in.tlb1_vpn2 := io.tlb(0).vpn2 + tlb_l2.in.tlb2_vpn2 := io.tlb(1).vpn2 + io.tlb(0).found := tlb_l2.out.tlb1_found + io.tlb(1).found := tlb_l2.out.tlb2_found + io.tlb(0).info := tlb_l2.out.tlb1_entry + io.tlb(1).info := tlb_l2.out.tlb2_entry + + // ---------------cp0-defines----------------- + + // index register (0,0) + val cp0_index = RegInit(0.U.asTypeOf(new Cp0Index())) + + // random register (1,0) + val random_init = Wire(new Cp0Random()) + random_init := 0.U.asTypeOf(new Cp0Random()) + random_init.random := (TLB_NUM - 1).U + val cp0_random = RegInit(random_init) + + // entrylo0 register (2,0) + val cp0_entrylo0 = RegInit(0.U.asTypeOf(new Cp0EntryLo())) + + // entrylo1 register (3,0) + val cp0_entrylo1 = RegInit(0.U.asTypeOf(new Cp0EntryLo())) + + // context register (4,0) + val cp0_context = RegInit(0.U.asTypeOf(new Cp0Context())) + + // page mask register (5,0) + val cp0_pagemask = 0.U + + // wired register (6,0) + val cp0_wired = RegInit(0.U.asTypeOf(new Cp0Wired())) + + // badvaddr register (8,0) + val cp0_badvaddr = RegInit(0.U.asTypeOf(new Cp0BadVAddr())) + + // count register (9,0) + val count_init = Wire(new Cp0Count()) + count_init := 0.U.asTypeOf(new Cp0Count()) + count_init.count := 1.U + val cp0_count = RegInit(count_init) + + // entryhi register (10,0) + val cp0_entryhi = RegInit(0.U.asTypeOf(new Cp0EntryHi())) + + // compare register (11,0) + val cp0_compare = RegInit(0.U.asTypeOf(new Cp0Compare())) + + // status register (12,0) + val status_init = Wire(new Cp0Status()) + status_init := 0.U.asTypeOf(new Cp0Status()) + status_init.bev := true.B + val cp0_status = RegInit(status_init) + + // cause register (13,0) + val cp0_cause = RegInit(0.U.asTypeOf(new Cp0Cause())) + + // epc register (14,0) + val cp0_epc = RegInit(0.U.asTypeOf(new Cp0Epc())) + + // prid register (15,0) + val prid = "h_0001_8003".U + + // ebase register (15,1) + val ebase_init = Wire(new Cp0Ebase()) + ebase_init := 0.U.asTypeOf(new Cp0Ebase()) + ebase_init.fill := true.B + val cp0_ebase = RegInit(ebase_init) + + // config register (16,0) + val cp0_config = Wire(new Cp0Config()) + cp0_config := 0.U.asTypeOf(new Cp0Config()) + cp0_config.k0 := 3.U + cp0_config.mt := 1.U + cp0_config.m := true.B + + // config1 register (16,1) + val cp0_config1 = Wire(new Cp0Config1()) + cp0_config1 := 0.U.asTypeOf(new Cp0Config1()) + cp0_config1.il := 5.U + cp0_config1.ia := 1.U + cp0_config1.dl := 5.U + cp0_config1.da := 1.U + cp0_config1.ms := (TLB_NUM - 1).U + + // taglo register (28,0) + val cp0_taglo = RegInit(0.U(DATA_WID.W)) + + // taghi register (29,0) + val cp0_taghi = RegInit(0.U(DATA_WID.W)) + + // error epc register (30,0) + val cp0_error_epc = RegInit(0.U.asTypeOf(new Cp0Epc())) + + tlb_l2.in.write.en := !exe_stall && (exe_op === EXE_TLBWI || exe_op === EXE_TLBWR) + tlb_l2.in.write.index := Mux(exe_op === EXE_TLBWI, cp0_index.index, cp0_random.random) + tlb_l2.in.write.entry.asid := cp0_entryhi.asid + tlb_l2.in.write.entry.vpn2 := cp0_entryhi.vpn2 + tlb_l2.in.write.entry.g := cp0_entrylo0.g || cp0_entrylo1.g + tlb_l2.in.write.entry.pfn(0) := cp0_entrylo0.pfn + tlb_l2.in.write.entry.pfn(1) := cp0_entrylo1.pfn + tlb_l2.in.write.entry.c(0) := cp0_entrylo0.c + tlb_l2.in.write.entry.c(1) := cp0_entrylo1.c + tlb_l2.in.write.entry.d(0) := cp0_entrylo0.d + tlb_l2.in.write.entry.d(1) := cp0_entrylo1.d + tlb_l2.in.write.entry.v(0) := cp0_entrylo0.v + tlb_l2.in.write.entry.v(1) := cp0_entrylo1.v + tlb_l2.in.entry_hi.asid := cp0_entryhi.asid + tlb_l2.in.entry_hi.vpn2 := cp0_entryhi.vpn2 + tlb_l2.in.read.index := cp0_index.index + + // index register (0,0) + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_INDEX_ADDR) { + cp0_index.index := mtc0_wdata(log2Ceil(TLB_NUM) - 1, 0) + }.elsewhen(exe_op === EXE_TLBP) { + cp0_index.index := Mux(tlb_l2.out.tlb_found, tlb_l2.out.tlb_match_index, cp0_index.index) + cp0_index.p := !tlb_l2.out.tlb_found + } + } + + // random register (1,0) + cp0_random.random := Mux(cp0_random.random === cp0_wired.wired, (TLB_NUM - 1).U, (cp0_random.random - 1.U)) + + // entrylo0 register (2,0) + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_ENTRYLO0_ADDR) { + val wdata = mtc0_wdata.asTypeOf(new Cp0EntryLo()) + cp0_entrylo0.pfn := wdata.pfn + cp0_entrylo0.c := wdata.c + cp0_entrylo0.d := wdata.d + cp0_entrylo0.v := wdata.v + cp0_entrylo0.g := wdata.g + }.elsewhen(exe_op === EXE_TLBR) { + cp0_entrylo0.pfn := tlb_l2.out.read.entry.pfn(0) + cp0_entrylo0.g := tlb_l2.out.read.entry.g + cp0_entrylo0.c := Cat(1.U((C_WID - 1).W), tlb_l2.out.read.entry.c(0)) + cp0_entrylo0.d := tlb_l2.out.read.entry.d(0) + cp0_entrylo0.v := tlb_l2.out.read.entry.v(0) + } + } + + // entrylo1 register (3,0) + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_ENTRYLO1_ADDR) { + val wdata = mtc0_wdata.asTypeOf(new Cp0EntryLo()) + cp0_entrylo1.pfn := wdata.pfn + cp0_entrylo1.c := wdata.c + cp0_entrylo1.d := wdata.d + cp0_entrylo1.v := wdata.v + cp0_entrylo1.g := wdata.g + }.elsewhen(exe_op === EXE_TLBR) { + cp0_entrylo1.pfn := tlb_l2.out.read.entry.pfn(1) + cp0_entrylo1.g := tlb_l2.out.read.entry.g + cp0_entrylo1.c := Cat(1.U((C_WID - 1).W), tlb_l2.out.read.entry.c(1)) + cp0_entrylo1.d := tlb_l2.out.read.entry.d(1) + cp0_entrylo1.v := tlb_l2.out.read.entry.v(1) + } + } + + // context register (4,0) + when(!mem_stall && ex.flush_req) { + when(VecInit(EX_TLBL, EX_TLBS, EX_MOD).contains(ex.excode)) { + cp0_context.badvpn2 := ex.badvaddr(31, 13) + } + }.elsewhen(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_CONTEXT_ADDR) { + cp0_context.ptebase := mtc0_wdata.asTypeOf(new Cp0Context()).ptebase + } + } + + // wired register (6,0) + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_WIRED_ADDR) { + cp0_wired.wired := mtc0_wdata.asTypeOf(new Cp0Wired()).wired + cp0_random.random := (TLB_NUM - 1).U + } + } + + // badvaddr register (8,0) + when(!mem_stall && ex.flush_req) { + when(VecInit(EX_ADEL, EX_TLBL, EX_ADES, EX_TLBS, EX_MOD).contains(ex.excode)) { + cp0_badvaddr.badvaddr := ex.badvaddr + } + } + + // count register (9,0) + val tick = RegInit(false.B) + tick := !tick + when(tick) { + cp0_count.count := cp0_count.count + 1.U + } + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_COUNT_ADDR) { + cp0_count.count := mtc0_wdata.asTypeOf(new Cp0Count()).count + } + } + + // entryhi register (10,0) + when(!mem_stall && ex.flush_req) { + when(VecInit(EX_TLBL, EX_TLBS, EX_MOD).contains(ex.excode)) { + cp0_entryhi.vpn2 := ex.badvaddr(31, 13) + } + }.elsewhen(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_ENTRYHI_ADDR) { + val wdata = mtc0_wdata.asTypeOf(new Cp0EntryHi()) + cp0_entryhi.asid := wdata.asid + cp0_entryhi.vpn2 := wdata.vpn2 + } + } + + // compare register (11,0) + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_COMPARE_ADDR) { + cp0_compare.compare := mtc0_wdata.asTypeOf(new Cp0Compare()).compare + } + } + + // status register (12,0) + when(!mem_stall && ex.eret) { + when(cp0_status.erl) { + cp0_status.erl := false.B + }.otherwise { + cp0_status.exl := false.B + } + }.elsewhen(!mem_stall && ex.flush_req) { + cp0_status.exl := true.B + }.elsewhen(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_STATUS_ADDR) { + val wdata = mtc0_wdata.asTypeOf(new Cp0Status()) + cp0_status.cu0 := wdata.cu0 + cp0_status.ie := wdata.ie + cp0_status.exl := wdata.exl + cp0_status.erl := wdata.erl + cp0_status.um := wdata.um + cp0_status.im := wdata.im + cp0_status.bev := wdata.bev + } + } + + // cause register (13,0) + cp0_cause.ip := Cat( + cp0_cause.ip(7) || cp0_compare.compare === cp0_count.count || io.ext_int(5), // TODO:此处的ext_int可能不对 + io.ext_int(4, 0), + cp0_cause.ip(1, 0) + ) + when(!mem_stall && ex.flush_req && !ex.eret) { + when(!cp0_status.exl) { + cp0_cause.bd := ex.bd + } + cp0_cause.excode := MuxLookup(ex.excode, cp0_cause.excode)( + Seq( + EX_NO -> EXC_NO, + EX_INT -> EXC_INT, + EX_MOD -> EXC_MOD, + EX_TLBL -> EXC_TLBL, + EX_TLBS -> EXC_TLBS, + EX_ADEL -> EXC_ADEL, + EX_ADES -> EXC_ADES, + EX_SYS -> EXC_SYS, + EX_BP -> EXC_BP, + EX_RI -> EXC_RI, + EX_CPU -> EXC_CPU, + EX_OV -> EXC_OV + ) + ) + }.elsewhen(!exe_stall) { + when(mtc0_wen) { + when(mtc0_addr === CP0_COMPARE_ADDR) { + cp0_cause.ip := Cat(false.B, cp0_cause.ip(6, 0)) + }.elsewhen(mtc0_addr === CP0_CAUSE_ADDR) { + val wdata = mtc0_wdata.asTypeOf(new Cp0Cause()) + cp0_cause.ip := Cat( + cp0_cause.ip(7, 2), + wdata.ip(1, 0) + ) + cp0_cause.iv := wdata.iv + } + } + } + + // epc register (14,0) + when(!mem_stall && ex.flush_req) { + when(!cp0_status.exl) { + cp0_epc.epc := Mux(ex.bd, pc - 4.U, pc) + } + }.elsewhen(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_EPC_ADDR) { + cp0_epc.epc := mtc0_wdata.asTypeOf(new Cp0Epc()).epc + } + } + + // ebase register (15,1) + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_EBASE_ADDR) { + cp0_ebase.ebase := mtc0_wdata.asTypeOf(new Cp0Ebase()).ebase + } + } + + // taglo register (28,0) + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_TAGLO_ADDR) { + cp0_taglo := mtc0_wdata + } + } + + // taghi register (29,0) + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_TAGHI_ADDR) { + cp0_taghi := mtc0_wdata + } + } + + // error epc register (30,0) + when(!exe_stall) { + when(mtc0_wen && mtc0_addr === CP0_ERROR_EPC_ADDR) { + cp0_error_epc.epc := mtc0_wdata.asTypeOf(new Cp0Epc()).epc + } + } + + for (i <- 0 until config.fuNum) { + io.executeUnit.out.cp0_rdata(i) := MuxLookup(io.executeUnit.in.inst_info(i).cp0_addr, 0.U)( + Seq( + CP0_INDEX_ADDR -> cp0_index.asUInt, + CP0_RANDOM_ADDR -> cp0_random.asUInt, + CP0_ENTRYLO0_ADDR -> cp0_entrylo0.asUInt, + CP0_ENTRYLO1_ADDR -> cp0_entrylo1.asUInt, + CP0_CONTEXT_ADDR -> cp0_context.asUInt, + CP0_PAGE_MASK_ADDR -> cp0_pagemask, + CP0_WIRED_ADDR -> cp0_wired.asUInt, + CP0_BADV_ADDR -> cp0_badvaddr.asUInt, + CP0_COUNT_ADDR -> cp0_count.asUInt, + CP0_ENTRYHI_ADDR -> cp0_entryhi.asUInt, + CP0_COMPARE_ADDR -> cp0_compare.asUInt, + CP0_STATUS_ADDR -> cp0_status.asUInt, + CP0_CAUSE_ADDR -> cp0_cause.asUInt, + CP0_EPC_ADDR -> cp0_epc.asUInt, + CP0_PRID_ADDR -> prid, + CP0_EBASE_ADDR -> cp0_ebase.asUInt, + CP0_CONFIG_ADDR -> cp0_config.asUInt, + CP0_CONFIG1_ADDR -> cp0_config1.asUInt, + CP0_TAGLO_ADDR -> cp0_taglo, + CP0_TAGHI_ADDR -> cp0_taghi, + CP0_ERROR_EPC_ADDR -> cp0_error_epc.asUInt + ) + ) + } + io.decoderUnit.cause_ip := cp0_cause.ip + io.decoderUnit.status_im := cp0_status.im + io.decoderUnit.kernel_mode := (cp0_status.exl && !(ex.eret && cp0_status.erl)) || + (cp0_status.erl && !ex.eret) || + !cp0_status.um || + (ex.flush_req && !ex.eret) + io.decoderUnit.access_allowed := io.decoderUnit.kernel_mode || cp0_status.cu0 + io.decoderUnit.intterupt_allowed := cp0_status.ie && !cp0_status.exl && !cp0_status.erl + + io.executeUnit.out.debug.cp0_cause := cp0_cause.asUInt + io.executeUnit.out.debug.cp0_count := cp0_count.asUInt + io.executeUnit.out.debug.cp0_random := cp0_random.asUInt + + val trap_base = Mux( + cp0_status.bev, + "hbfc00200".U(PC_WID.W), + cp0_ebase.asUInt + ) + io.memoryUnit.out.flush := false.B + io.memoryUnit.out.flush_pc := 0.U + when(ex.eret) { + io.memoryUnit.out.flush := true.B && !io.ctrl.mem_stall + io.memoryUnit.out.flush_pc := Mux(cp0_status.erl, cp0_error_epc.epc, cp0_epc.epc) + }.elsewhen(ex.flush_req) { + io.memoryUnit.out.flush := true.B && !io.ctrl.mem_stall + io.memoryUnit.out.flush_pc := Mux( + cp0_status.exl, + trap_base + "h180".U, + trap_base + Mux( + ex.excode === EX_INT && cp0_cause.iv && !cp0_status.bev, + "h200".U, + Mux(ex.tlb_refill && ex.excode =/= EX_INT, 0.U, "h180".U) + ) + ) + } +} diff --git a/chisel/playground/src/pipeline/execute/Div.scala b/chisel/playground/src/pipeline/execute/Div.scala new file mode 100644 index 0000000..9f22efc --- /dev/null +++ b/chisel/playground/src/pipeline/execute/Div.scala @@ -0,0 +1,160 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class SignedDiv extends BlackBox with HasBlackBoxResource { + val io = IO(new Bundle { + val aclk = Input(Clock()) + // 除数 + val s_axis_divisor_tvalid = Input(Bool()) + val s_axis_divisor_tready = Output(Bool()) + val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) + // 被除数 + val s_axis_dividend_tvalid = Input(Bool()) + val s_axis_dividend_tready = Output(Bool()) + val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) + // 结果 + val m_axis_dout_tvalid = Output(Bool()) + val m_axis_dout_tdata = Output(UInt(HILO_WID.W)) + }) +} + +class UnsignedDiv extends BlackBox with HasBlackBoxResource { + val io = IO(new Bundle { + val aclk = Input(Clock()) + // 除数 + val s_axis_divisor_tvalid = Input(Bool()) + val s_axis_divisor_tready = Output(Bool()) + val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) + // 被除数 + val s_axis_dividend_tvalid = Input(Bool()) + val s_axis_dividend_tready = Output(Bool()) + val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) + // 结果 + val m_axis_dout_tvalid = Output(Bool()) + val m_axis_dout_tdata = Output(UInt(HILO_WID.W)) + }) +} + +class Div(implicit config: CpuConfig) extends Module { + val io = IO(new Bundle { + val src1 = Input(UInt(DATA_WID.W)) + val src2 = Input(UInt(DATA_WID.W)) + val signed = Input(Bool()) + val start = Input(Bool()) + val allow_to_go = Input(Bool()) + + val ready = Output(Bool()) + val result = Output(UInt(HILO_WID.W)) + }) + + if (config.build) { + val signedDiv = Module(new SignedDiv()).io + val unsignedDiv = Module(new UnsignedDiv()).io + + signedDiv.aclk := clock + unsignedDiv.aclk := clock + + // 0为被除数,1为除数 + val unsignedDiv_sent = Seq.fill(2)(RegInit(false.B)) + val unsignedDiv_done = RegInit(false.B) + val signedDiv_sent = Seq.fill(2)(RegInit(false.B)) + val signedDiv_done = RegInit(false.B) + + when(unsignedDiv.s_axis_dividend_tready && unsignedDiv.s_axis_dividend_tvalid) { + unsignedDiv_sent(0) := true.B + }.elsewhen(io.ready && io.allow_to_go) { + unsignedDiv_sent(0) := false.B + } + when(unsignedDiv.s_axis_divisor_tready && unsignedDiv.s_axis_divisor_tvalid) { + unsignedDiv_sent(1) := true.B + }.elsewhen(io.ready && io.allow_to_go) { + unsignedDiv_sent(1) := false.B + } + + when(signedDiv.s_axis_dividend_tready && signedDiv.s_axis_dividend_tvalid) { + signedDiv_sent(0) := true.B + }.elsewhen(io.ready && io.allow_to_go) { + signedDiv_sent(0) := false.B + } + when(signedDiv.s_axis_divisor_tready && signedDiv.s_axis_divisor_tvalid) { + signedDiv_sent(1) := true.B + }.elsewhen(io.ready && io.allow_to_go) { + signedDiv_sent(1) := false.B + } + + when(signedDiv.m_axis_dout_tvalid && !io.allow_to_go) { + signedDiv_done := true.B + }.elsewhen(io.allow_to_go) { + signedDiv_done := false.B + } + + when(unsignedDiv.m_axis_dout_tvalid && !io.allow_to_go) { + unsignedDiv_done := true.B + }.elsewhen(io.allow_to_go) { + unsignedDiv_done := false.B + } + // 被除数和除数的valid信号 + signedDiv.s_axis_dividend_tvalid := io.start && !signedDiv_sent(0) && io.signed + signedDiv.s_axis_divisor_tvalid := io.start && !signedDiv_sent(1) && io.signed + + unsignedDiv.s_axis_dividend_tvalid := io.start && !unsignedDiv_sent(0) && !io.signed + unsignedDiv.s_axis_divisor_tvalid := io.start && !unsignedDiv_sent(1) && !io.signed + + // 被除数和除数的值 + signedDiv.s_axis_dividend_tdata := io.src1 + signedDiv.s_axis_divisor_tdata := io.src2 + + unsignedDiv.s_axis_dividend_tdata := io.src1 + unsignedDiv.s_axis_divisor_tdata := io.src2 + + io.ready := Mux( + io.signed, + signedDiv.m_axis_dout_tvalid || signedDiv_done, + unsignedDiv.m_axis_dout_tvalid || unsignedDiv_done, + ) + val signedRes = + Cat(signedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), signedDiv.m_axis_dout_tdata(HILO_WID - 1, DATA_WID)) + val unsignedRes = + Cat(unsignedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), unsignedDiv.m_axis_dout_tdata(HILO_WID - 1, DATA_WID)) + io.result := Mux(io.signed, signedRes, unsignedRes) + } else { + val cnt = RegInit(0.U(log2Ceil(config.divClockNum + 1).W)) + cnt := MuxCase( + cnt, + Seq( + (io.start && !io.ready) -> (cnt + 1.U), + io.allow_to_go -> 0.U, + ), + ) + + val div_signed = io.signed + + val dividend_signed = io.src1(31) & div_signed + val divisor_signed = io.src2(31) & div_signed + + val dividend_abs = Mux(dividend_signed, (-io.src1).asUInt, io.src1.asUInt) + val divisor_abs = Mux(divisor_signed, (-io.src2).asUInt, io.src2.asUInt) + + val quotient_signed = (io.src1(31) ^ io.src2(31)) & div_signed + val remainder_signed = io.src1(31) & div_signed + + val quotient_abs = dividend_abs / divisor_abs + val remainder_abs = dividend_abs - quotient_abs * divisor_abs + + val quotient = RegInit(0.S(32.W)) + val remainder = RegInit(0.S(32.W)) + + when(io.start) { + quotient := Mux(quotient_signed, (-quotient_abs).asSInt, quotient_abs.asSInt) + remainder := Mux(remainder_signed, (-remainder_abs).asSInt, remainder_abs.asSInt) + } + + io.ready := cnt >= config.divClockNum.U + io.result := Cat(remainder, quotient) + } +} diff --git a/chisel/playground/src/pipeline/execute/ExeAccessMemCtrl.scala b/chisel/playground/src/pipeline/execute/ExeAccessMemCtrl.scala new file mode 100644 index 0000000..f4bdb3a --- /dev/null +++ b/chisel/playground/src/pipeline/execute/ExeAccessMemCtrl.scala @@ -0,0 +1,99 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.CpuConfig +import cpu.defines._ +import cpu.defines.Const._ + +class ExeAccessMemCtrl(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val mem = new Bundle { + val out = Output(new Bundle { + val en = Bool() + val ren = Bool() + val wen = Bool() + val inst_info = new InstInfo() + val addr = UInt(DATA_ADDR_WID.W) + val wdata = UInt(DATA_WID.W) + }) + } + + val inst = Vec( + config.fuNum, + new Bundle { + val inst_info = Input(new InstInfo()) + val src_info = Input(new SrcInfo()) + val ex = new Bundle { + val in = Input(new ExceptionInfo()) + val out = Output(new ExceptionInfo()) + } + val mem_sel = Output(Bool()) + }, + ) + }) + io.mem.out.en := io.inst.map(_.mem_sel).reduce(_ || _) + io.mem.out.ren := io.inst(0).mem_sel && io.inst(0).inst_info.rmem || + io.inst(1).mem_sel && io.inst(1).inst_info.rmem + io.mem.out.wen := io.inst(0).mem_sel && io.inst(0).inst_info.wmem || + io.inst(1).mem_sel && io.inst(1).inst_info.wmem + io.mem.out.inst_info := MuxCase( + DontCare, + Seq( + (io.inst(0).inst_info.fusel === FU_MEM) -> io.inst(0).inst_info, + (io.inst(1).inst_info.fusel === FU_MEM) -> io.inst(1).inst_info, + ), + ) + val mem_addr = Wire(Vec(config.fuNum, UInt(DATA_ADDR_WID.W))) + mem_addr(0) := io.inst(0).inst_info.mem_addr + mem_addr(1) := io.inst(1).inst_info.mem_addr + io.mem.out.addr := MuxCase( + 0.U, + Seq( + (io.inst(0).inst_info.fusel === FU_MEM) -> mem_addr(0), + (io.inst(1).inst_info.fusel === FU_MEM) -> mem_addr(1), + ), + ) + io.mem.out.wdata := MuxCase( + 0.U, + Seq( + (io.inst(0).inst_info.fusel === FU_MEM) -> + io.inst(0).src_info.src2_data, + (io.inst(1).inst_info.fusel === FU_MEM) -> + io.inst(1).src_info.src2_data, + ), + ) + val mem_adel = Wire(Vec(config.fuNum, Bool())) + for (i <- 0 until config.fuNum) { + mem_adel(i) := VecInit(EXE_LW, EXE_LL).contains(io.inst(i).inst_info.op) && mem_addr(i)(1, 0) =/= 0.U || + VecInit(EXE_LH, EXE_LHU).contains(io.inst(i).inst_info.op) && mem_addr(i)(0) =/= 0.U + } + val mem_ades = Wire(Vec(config.fuNum, Bool())) + for (i <- 0 until config.fuNum) { + mem_ades(i) := VecInit(EXE_SW, EXE_SC).contains(io.inst(i).inst_info.op) && mem_addr(i)(1, 0) =/= 0.U || + io.inst(i).inst_info.op === EXE_SH && mem_addr(i)(0) =/= 0.U + } + + for (i <- 0 until config.fuNum) { + io.inst(i).ex.out := io.inst(i).ex.in + io.inst(i).ex.out.excode := MuxCase( + io.inst(i).ex.in.excode, + Seq( + (io.inst(i).ex.in.excode =/= EX_NO) -> io.inst(i).ex.in.excode, + mem_adel(i) -> EX_ADEL, + mem_ades(i) -> EX_ADES, + ), + ) + io.inst(i).ex.out.badvaddr := Mux( + VecInit(EX_ADEL, EX_ADES).contains(io.inst(i).ex.in.excode), + io.inst(i).ex.in.badvaddr, + mem_addr(i), + ) + io.inst(i).ex.out.flush_req := io.inst(i).ex.in.flush_req || io.inst(i).ex.out.excode =/= EX_NO + } + io.inst(0).mem_sel := (io.inst(0).inst_info.wmem || io.inst(0).inst_info.rmem) && + !io.inst(0).ex.out.flush_req + io.inst(1).mem_sel := (io.inst(1).inst_info.wmem || io.inst(1).inst_info.rmem) && + !io.inst(0).ex.out.flush_req && !io.inst(1).ex.out.flush_req + +} diff --git a/chisel/playground/src/pipeline/execute/ExecuteStage.scala b/chisel/playground/src/pipeline/execute/ExecuteStage.scala new file mode 100644 index 0000000..14f63ca --- /dev/null +++ b/chisel/playground/src/pipeline/execute/ExecuteStage.scala @@ -0,0 +1,66 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.{CpuConfig, BranchPredictorConfig} + +class IdExeInst0 extends Bundle { + val config = new BranchPredictorConfig() + val pc = UInt(PC_WID.W) + val inst_info = new InstInfo() + val src_info = new SrcInfo() + val ex = new ExceptionInfo() + val jb_info = new Bundle { + // jump ctrl + val jump_regiser = Bool() + // bpu + val branch_inst = Bool() + val pred_branch = Bool() + val branch_target = UInt(PC_WID.W) + val update_pht_index = UInt(config.phtDepth.W) + } +} + +class IdExeInst1 extends Bundle { + val allow_to_go = Bool() + val pc = UInt(PC_WID.W) + val inst_info = new InstInfo() + val src_info = new SrcInfo() + val ex = new ExceptionInfo() +} + +class DecoderUnitExecuteUnit extends Bundle { + val inst0 = new IdExeInst0() + val inst1 = new IdExeInst1() +} + +class ExecuteStage(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val ctrl = Input(new Bundle { + val inst0_allow_to_go = Bool() + val clear = Vec(config.decoderNum, Bool()) + }) + val decoderUnit = Input(new DecoderUnitExecuteUnit()) + val executeUnit = Output(new DecoderUnitExecuteUnit()) + }) + + val inst0 = RegInit(0.U.asTypeOf(new IdExeInst0())) + val inst1 = RegInit(0.U.asTypeOf(new IdExeInst1())) + + when(io.ctrl.clear(0)) { + inst0 := 0.U.asTypeOf(new IdExeInst0()) + }.elsewhen(io.ctrl.inst0_allow_to_go) { + inst0 := io.decoderUnit.inst0 + } + + when(io.ctrl.clear(1)) { + inst1 := 0.U.asTypeOf(new IdExeInst1()) + }.elsewhen(io.decoderUnit.inst1.allow_to_go) { + inst1 := io.decoderUnit.inst1 + } + + io.executeUnit.inst0 := inst0 + io.executeUnit.inst1 := inst1 +} diff --git a/chisel/playground/src/pipeline/execute/ExecuteUnit.scala b/chisel/playground/src/pipeline/execute/ExecuteUnit.scala new file mode 100644 index 0000000..ece9f88 --- /dev/null +++ b/chisel/playground/src/pipeline/execute/ExecuteUnit.scala @@ -0,0 +1,151 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.CpuConfig +import cpu.defines._ +import cpu.defines.Const._ +import cpu.pipeline.decoder.RegWrite +import cpu.pipeline.memory.{ExecuteUnitMemoryUnit, Cp0Info} +import cpu.pipeline.fetch.ExecuteUnitBranchPredictor + +class ExecuteUnit(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val ctrl = new ExecuteCtrl() + val executeStage = Input(new DecoderUnitExecuteUnit()) + val cp0 = Flipped(new Cp0ExecuteUnit()) + val bpu = new ExecuteUnitBranchPredictor() + val fetchUnit = Output(new Bundle { + val branch = Bool() + val target = UInt(PC_WID.W) + }) + val decoderUnit = new Bundle { + val forward = Output( + Vec( + config.fuNum, + new Bundle { + val exe = new RegWrite() + val exe_mem_wreg = Bool() + }, + ), + ) + val inst0_bd = Input(Bool()) + } + val memoryStage = Output(new ExecuteUnitMemoryUnit()) + + val statistic = if (!config.build) Some(new BranchPredictorUnitStatistic()) else None + }) + + val fu = Module(new Fu()).io + val accessMemCtrl = Module(new ExeAccessMemCtrl()).io + + io.ctrl.inst(0).mem_wreg := io.executeStage.inst0.inst_info.mem_wreg + io.ctrl.inst(0).reg_waddr := io.executeStage.inst0.inst_info.reg_waddr + io.ctrl.inst(1).mem_wreg := io.executeStage.inst1.inst_info.mem_wreg + io.ctrl.inst(1).reg_waddr := io.executeStage.inst1.inst_info.reg_waddr + io.ctrl.branch := io.ctrl.allow_to_go && + (io.executeStage.inst0.jb_info.jump_regiser || fu.branch.pred_fail) + + io.cp0.in.mtc0_wdata := io.executeStage.inst0.src_info.src2_data + io.cp0.in.inst_info(0) := Mux( + !io.executeStage.inst0.ex.flush_req, + io.executeStage.inst0.inst_info, + 0.U.asTypeOf(new InstInfo()), + ) + io.cp0.in.inst_info(1) := io.executeStage.inst1.inst_info + + // input accessMemCtrl + accessMemCtrl.inst(0).inst_info := io.executeStage.inst0.inst_info + accessMemCtrl.inst(0).src_info := io.executeStage.inst0.src_info + accessMemCtrl.inst(0).ex.in := io.executeStage.inst0.ex + accessMemCtrl.inst(1).inst_info := io.executeStage.inst1.inst_info + accessMemCtrl.inst(1).src_info := io.executeStage.inst1.src_info + accessMemCtrl.inst(1).ex.in := io.executeStage.inst1.ex + + // input fu + fu.ctrl <> io.ctrl.fu + fu.inst(0).pc := io.executeStage.inst0.pc + fu.inst(0).hilo_wen := io.executeStage.inst0.inst_info.whilo + fu.inst(0).mul_en := io.executeStage.inst0.inst_info.mul + fu.inst(0).div_en := io.executeStage.inst0.inst_info.div + fu.inst(0).inst_info := io.executeStage.inst0.inst_info + fu.inst(0).src_info := io.executeStage.inst0.src_info + fu.inst(0).ex.in := + Mux(io.executeStage.inst0.inst_info.fusel === FU_MEM, accessMemCtrl.inst(0).ex.out, io.executeStage.inst0.ex) + fu.inst(1).pc := io.executeStage.inst1.pc + fu.inst(1).hilo_wen := io.executeStage.inst1.inst_info.whilo + fu.inst(1).mul_en := io.executeStage.inst1.inst_info.mul + fu.inst(1).div_en := io.executeStage.inst1.inst_info.div + fu.inst(1).inst_info := io.executeStage.inst1.inst_info + fu.inst(1).src_info := io.executeStage.inst1.src_info + fu.inst(1).ex.in := io.executeStage.inst1.ex + fu.cp0_rdata := io.cp0.out.cp0_rdata + fu.branch.pred_branch := io.executeStage.inst0.jb_info.pred_branch + + io.bpu.pc := io.executeStage.inst0.pc + io.bpu.update_pht_index := io.executeStage.inst0.jb_info.update_pht_index + io.bpu.branch := fu.branch.branch + io.bpu.branch_inst := io.executeStage.inst0.jb_info.branch_inst + + io.fetchUnit.branch := io.ctrl.allow_to_go && + (io.executeStage.inst0.jb_info.jump_regiser || fu.branch.pred_fail) + io.fetchUnit.target := MuxCase( + io.executeStage.inst0.pc + 4.U, // 默认顺序运行吧 + Seq( + (fu.branch.pred_fail && fu.branch.branch) -> io.executeStage.inst0.jb_info.branch_target, + (fu.branch.pred_fail && !fu.branch.branch) -> Mux( + io.decoderUnit.inst0_bd || io.executeStage.inst1.ex.bd, + io.executeStage.inst0.pc + 8.U, + io.executeStage.inst0.pc + 4.U, + ), + (io.executeStage.inst0.jb_info.jump_regiser) -> io.executeStage.inst0.src_info.src1_data, + ), + ) + + io.ctrl.fu_stall := fu.stall_req + + io.memoryStage.inst0.mem.en := accessMemCtrl.mem.out.en + io.memoryStage.inst0.mem.ren := accessMemCtrl.mem.out.ren + io.memoryStage.inst0.mem.wen := accessMemCtrl.mem.out.wen + io.memoryStage.inst0.mem.addr := accessMemCtrl.mem.out.addr + io.memoryStage.inst0.mem.wdata := accessMemCtrl.mem.out.wdata + io.memoryStage.inst0.mem.sel := accessMemCtrl.inst.map(_.mem_sel) + io.memoryStage.inst0.mem.inst_info := accessMemCtrl.mem.out.inst_info + io.memoryStage.inst0.mem.llbit := fu.llbit + + io.memoryStage.inst0.pc := io.executeStage.inst0.pc + io.memoryStage.inst0.inst_info := io.executeStage.inst0.inst_info + io.memoryStage.inst0.rd_info.wdata := fu.inst(0).result + io.memoryStage.inst0.ex := Mux( + io.executeStage.inst0.inst_info.fusel === FU_MEM, + accessMemCtrl.inst(0).ex.out, + fu.inst(0).ex.out, + ) + io.memoryStage.inst0.cp0 := io.cp0.out.debug + + io.memoryStage.inst1.pc := io.executeStage.inst1.pc + io.memoryStage.inst1.inst_info := io.executeStage.inst1.inst_info + io.memoryStage.inst1.rd_info.wdata := fu.inst(1).result + io.memoryStage.inst1.ex := Mux( + io.executeStage.inst1.inst_info.fusel === FU_MEM, + accessMemCtrl.inst(1).ex.out, + fu.inst(1).ex.out, + ) + + io.decoderUnit.forward(0).exe.wen := io.memoryStage.inst0.inst_info.reg_wen + io.decoderUnit.forward(0).exe.waddr := io.memoryStage.inst0.inst_info.reg_waddr + io.decoderUnit.forward(0).exe.wdata := io.memoryStage.inst0.rd_info.wdata + io.decoderUnit.forward(0).exe_mem_wreg := io.memoryStage.inst0.inst_info.mem_wreg + + io.decoderUnit.forward(1).exe.wen := io.memoryStage.inst1.inst_info.reg_wen + io.decoderUnit.forward(1).exe.waddr := io.memoryStage.inst1.inst_info.reg_waddr + io.decoderUnit.forward(1).exe.wdata := io.memoryStage.inst1.rd_info.wdata + io.decoderUnit.forward(1).exe_mem_wreg := io.memoryStage.inst1.inst_info.mem_wreg + + // ===----------------------------------------------------------------=== + // statistic + // ===----------------------------------------------------------------=== + if (!config.build) { + io.statistic.get <> fu.statistic.get + } +} diff --git a/chisel/playground/src/pipeline/execute/Fu.scala b/chisel/playground/src/pipeline/execute/Fu.scala new file mode 100644 index 0000000..8492528 --- /dev/null +++ b/chisel/playground/src/pipeline/execute/Fu.scala @@ -0,0 +1,118 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class Fu(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val ctrl = new ExecuteFuCtrl() + val inst = Vec( + config.decoderNum, + new Bundle { + val pc = Input(UInt(PC_WID.W)) + val hilo_wen = Input(Bool()) + val mul_en = Input(Bool()) + val div_en = Input(Bool()) + val inst_info = Input(new InstInfo()) + val src_info = Input(new SrcInfo()) + val ex = new Bundle { + val in = Input(new ExceptionInfo()) + val out = Output(new ExceptionInfo()) + } + val result = Output(UInt(DATA_WID.W)) + }, + ) + val cp0_rdata = Input(Vec(config.fuNum, UInt(DATA_WID.W))) + val stall_req = Output(Bool()) + val branch = new Bundle { + val pred_branch = Input(Bool()) + val branch = Output(Bool()) + val pred_fail = Output(Bool()) + } + val llbit = Output(Bool()) + + val statistic = if (!config.build) Some(new BranchPredictorUnitStatistic()) else None + }) + + val alu = Seq.fill(config.decoderNum)(Module(new Alu())) + val mul = Module(new Mul()).io + val div = Module(new Div()).io + val hilo = Module(new HiLo()).io + val branchCtrl = Module(new BranchCtrl()).io + val llbit = Module(new LLbit()).io + + branchCtrl.in.inst_info := io.inst(0).inst_info + branchCtrl.in.src_info := io.inst(0).src_info + branchCtrl.in.pred_branch := io.branch.pred_branch + io.branch.branch := branchCtrl.out.branch + io.branch.pred_fail := branchCtrl.out.pred_fail + + for (i <- 0 until (config.fuNum)) { + alu(i).io.inst_info := io.inst(i).inst_info + alu(i).io.src_info := io.inst(i).src_info + alu(i).io.hilo.rdata := hilo.rdata + alu(i).io.mul.result := mul.result + alu(i).io.mul.ready := mul.ready + alu(i).io.div.ready := div.ready + alu(i).io.div.result := div.result + alu(i).io.cp0_rdata := io.cp0_rdata(i) + alu(i).io.llbit := io.llbit + io.inst(i).ex.out := io.inst(i).ex.in + io.inst(i).ex.out.flush_req := io.inst(i).ex.in.flush_req || alu(i).io.overflow + io.inst(i).ex.out.excode := MuxCase( + io.inst(i).ex.in.excode, + Seq( + (io.inst(i).ex.in.excode =/= EX_NO) -> io.inst(i).ex.in.excode, + alu(i).io.overflow -> EX_OV, + ), + ) + } + + mul.src1 := Mux(io.inst(0).mul_en, io.inst(0).src_info.src1_data, io.inst(1).src_info.src1_data) + mul.src2 := Mux(io.inst(0).mul_en, io.inst(0).src_info.src2_data, io.inst(1).src_info.src2_data) + mul.signed := Mux(io.inst(0).mul_en, alu(0).io.mul.signed, alu(1).io.mul.signed) + mul.start := Mux(io.inst(0).mul_en, alu(0).io.mul.en, alu(1).io.mul.en) + mul.allow_to_go := io.ctrl.allow_to_go + + div.src1 := Mux(io.inst(0).div_en, io.inst(0).src_info.src1_data, io.inst(1).src_info.src1_data) + div.src2 := Mux(io.inst(0).div_en, io.inst(0).src_info.src2_data, io.inst(1).src_info.src2_data) + div.signed := Mux(io.inst(0).div_en, alu(0).io.div.signed, alu(1).io.div.signed) + div.start := Mux(io.inst(0).div_en, alu(0).io.div.en, alu(1).io.div.en) + div.allow_to_go := io.ctrl.allow_to_go + + io.stall_req := (io.inst.map(_.div_en).reduce(_ || _) && !div.ready) || + (io.inst.map(_.mul_en).reduce(_ || _) && !mul.ready) + + io.inst(0).result := Mux( + io.inst(0).inst_info.branch_link, + io.inst(0).pc + 8.U, + alu(0).io.result, + ) + io.inst(1).result := alu(1).io.result + + hilo.wen := ((io.inst(1).hilo_wen && !io.inst.map(_.ex.out.flush_req).reduce(_ || _)) || + (io.inst(0).hilo_wen && !io.inst(0).ex.out.flush_req)) && io.ctrl.allow_to_go && !io.ctrl.do_flush + hilo.wdata := Mux(io.inst(1).hilo_wen, alu(1).io.hilo.wdata, alu(0).io.hilo.wdata) + + llbit.do_flush := io.ctrl.eret + llbit.wen := (io.inst(0).inst_info.op === EXE_LL || io.inst(0).inst_info.op === EXE_SC || + io.inst(1).inst_info.op === EXE_LL || io.inst(1).inst_info.op === EXE_SC) && io.ctrl.allow_to_go + llbit.wdata := io.inst(0).inst_info.op === EXE_LL || io.inst(1).inst_info.op === EXE_LL + val llbit_rdata = if (config.build) llbit.rdata else true.B + io.llbit := llbit_rdata + + // ===----------------------------------------------------------------=== + // statistic + // ===----------------------------------------------------------------=== + if (!config.build) { + val branch_cnt = RegInit(0.U(32.W)) + val success_cnt = RegInit(0.U(32.W)) + when(io.branch.branch) { branch_cnt := branch_cnt + 1.U } + when(!io.branch.pred_fail) { success_cnt := success_cnt + 1.U } + io.statistic.get.branch := branch_cnt + io.statistic.get.success := success_cnt + } +} diff --git a/chisel/playground/src/pipeline/execute/HiLo.scala b/chisel/playground/src/pipeline/execute/HiLo.scala new file mode 100644 index 0000000..117b569 --- /dev/null +++ b/chisel/playground/src/pipeline/execute/HiLo.scala @@ -0,0 +1,22 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines.Const._ + +import cpu.defines._ +class HiLo extends Module { + val io = IO(new Bundle { + val wen = Input(Bool()) + val wdata = Input(UInt(HILO_WID.W)) + val rdata = Output(UInt(HILO_WID.W)) + }) + // output + val hilo = RegInit(0.U(HILO_WID.W)) + + when(io.wen) { + hilo := io.wdata + } + + io.rdata := hilo +} diff --git a/chisel/playground/src/pipeline/execute/LLbit.scala b/chisel/playground/src/pipeline/execute/LLbit.scala new file mode 100644 index 0000000..8d0682b --- /dev/null +++ b/chisel/playground/src/pipeline/execute/LLbit.scala @@ -0,0 +1,23 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ + +class LLbit extends Module { + val io = IO(new Bundle { + val do_flush = Input(Bool()) + val wen = Input(Bool()) + val wdata = Input(Bool()) + + val rdata = Output(Bool()) + }) + val llbit = RegInit(false.B) + + when(io.do_flush) { + llbit := false.B + }.elsewhen(io.wen) { + llbit := io.wdata + } + + io.rdata := llbit +} diff --git a/chisel/playground/src/pipeline/execute/Mul.scala b/chisel/playground/src/pipeline/execute/Mul.scala new file mode 100644 index 0000000..1750a2c --- /dev/null +++ b/chisel/playground/src/pipeline/execute/Mul.scala @@ -0,0 +1,225 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class SignedMul extends BlackBox with HasBlackBoxResource { + val io = IO(new Bundle { + val CLK = Input(Clock()) + val CE = Input(Bool()) + val A = Input(UInt((DATA_WID + 1).W)) + val B = Input(UInt((DATA_WID + 1).W)) + + val P = Output(UInt((HILO_WID + 2).W)) + }) +} + +class Mul(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val src1 = Input(UInt(DATA_WID.W)) + val src2 = Input(UInt(DATA_WID.W)) + val signed = Input(Bool()) + val start = Input(Bool()) + val allow_to_go = Input(Bool()) + + val ready = Output(Bool()) + val result = Output(UInt(HILO_WID.W)) + }) + + if (config.build) { + val signedMul = Module(new SignedMul()).io + val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W)) + + cnt := MuxCase( + cnt, + Seq( + (io.start && !io.ready) -> (cnt + 1.U), + io.allow_to_go -> 0.U, + ), + ) + + signedMul.CLK := clock + signedMul.CE := io.start + when(io.signed) { + signedMul.A := Cat(io.src1(DATA_WID - 1), io.src1) + signedMul.B := Cat(io.src2(DATA_WID - 1), io.src2) + }.otherwise { + signedMul.A := Cat(0.U(1.W), io.src1) + signedMul.B := Cat(0.U(1.W), io.src2) + } + io.ready := cnt >= config.mulClockNum.U + io.result := signedMul.P(HILO_WID - 1, 0) + } else { + val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W)) + cnt := MuxCase( + cnt, + Seq( + (io.start && !io.ready) -> (cnt + 1.U), + io.allow_to_go -> 0.U, + ), + ) + + val signed = RegInit(0.U(HILO_WID.W)) + val unsigned = RegInit(0.U(HILO_WID.W)) + when(io.start) { + signed := (io.src1.asSInt * io.src2.asSInt).asUInt + unsigned := io.src1 * io.src2 + } + io.result := Mux(io.signed, signed, unsigned) + io.ready := cnt >= config.mulClockNum.U + } +} + +// class ArrayMulDataModule(len: Int) extends Module { +// val io = IO(new Bundle() { +// val a, b = Input(UInt(len.W)) +// val regEnables = Input(Vec(2, Bool())) +// val result = Output(UInt((2 * len).W)) +// }) +// val (a, b) = (io.a, io.b) + +// val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len + 1).W)) +// b_sext := SignExt(b, len + 1) +// bx2 := b_sext << 1 +// neg_b := (~b_sext).asUInt() +// neg_bx2 := neg_b << 1 + +// val columns: Array[Seq[Bool]] = Array.fill(2 * len)(Seq()) + +// var last_x = WireInit(0.U(3.W)) +// for (i <- Range(0, len, 2)) { +// val x = if (i == 0) Cat(a(1, 0), 0.U(1.W)) else if (i + 1 == len) SignExt(a(i, i - 1), 3) else a(i + 1, i - 1) +// val pp_temp = MuxLookup( +// x, +// 0.U, +// Seq( +// 1.U -> b_sext, +// 2.U -> b_sext, +// 3.U -> bx2, +// 4.U -> neg_bx2, +// 5.U -> neg_b, +// 6.U -> neg_b, +// ), +// ) +// val s = pp_temp(len) +// val t = MuxLookup( +// last_x, +// 0.U(2.W), +// Seq( +// 4.U -> 2.U(2.W), +// 5.U -> 1.U(2.W), +// 6.U -> 1.U(2.W), +// ), +// ) +// last_x = x +// val (pp, weight) = i match { +// case 0 => +// (Cat(~s, s, s, pp_temp), 0) +// case n if (n == len - 1) || (n == len - 2) => +// (Cat(~s, pp_temp, t), i - 2) +// case _ => +// (Cat(1.U(1.W), ~s, pp_temp, t), i - 2) +// } +// for (j <- columns.indices) { +// if (j >= weight && j < (weight + pp.getWidth)) { +// columns(j) = columns(j) :+ pp(j - weight) +// } +// } +// } + +// def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = { +// var sum = Seq[Bool]() +// var cout1 = Seq[Bool]() +// var cout2 = Seq[Bool]() +// col.size match { +// case 1 => // do nothing +// sum = col ++ cin +// case 2 => +// val c22 = Module(new C22) +// c22.io.in := col +// sum = c22.io.out(0).asBool() +: cin +// cout2 = Seq(c22.io.out(1).asBool()) +// case 3 => +// val c32 = Module(new C32) +// c32.io.in := col +// sum = c32.io.out(0).asBool() +: cin +// cout2 = Seq(c32.io.out(1).asBool()) +// case 4 => +// val c53 = Module(new C53) +// for ((x, y) <- c53.io.in.take(4) zip col) { +// x := y +// } +// c53.io.in.last := (if (cin.nonEmpty) cin.head else 0.U) +// sum = Seq(c53.io.out(0).asBool()) ++ (if (cin.nonEmpty) cin.drop(1) else Nil) +// cout1 = Seq(c53.io.out(1).asBool()) +// cout2 = Seq(c53.io.out(2).asBool()) +// case n => +// val cin_1 = if (cin.nonEmpty) Seq(cin.head) else Nil +// val cin_2 = if (cin.nonEmpty) cin.drop(1) else Nil +// val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1) +// val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2) +// sum = s_1 ++ s_2 +// cout1 = c_1_1 ++ c_2_1 +// cout2 = c_1_2 ++ c_2_2 +// } +// (sum, cout1, cout2) +// } + +// def max(in: Iterable[Int]): Int = in.reduce((a, b) => if (a > b) a else b) +// def addAll(cols: Array[Seq[Bool]], depth: Int): (UInt, UInt) = { +// if (max(cols.map(_.size)) <= 2) { +// val sum = Cat(cols.map(_(0)).reverse) +// var k = 0 +// while (cols(k).size == 1) k = k + 1 +// val carry = Cat(cols.drop(k).map(_(1)).reverse) +// (sum, Cat(carry, 0.U(k.W))) +// } else { +// val columns_next = Array.fill(2 * len)(Seq[Bool]()) +// var cout1, cout2 = Seq[Bool]() +// for (i <- cols.indices) { +// val (s, c1, c2) = addOneColumn(cols(i), cout1) +// columns_next(i) = s ++ cout2 +// cout1 = c1 +// cout2 = c2 +// } + +// val needReg = depth == 4 +// val toNextLayer = +// if (needReg) +// columns_next.map(_.map(x => RegEnable(x, io.regEnables(1)))) +// else +// columns_next + +// addAll(toNextLayer, depth + 1) +// } +// } + +// val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0)))) +// val (sum, carry) = addAll(cols = columns_reg, depth = 0) + +// io.result := sum + carry +// } + +// class ArrayMultiplier(len: Int) extends Module { +// override def latency = 2 + +// val mulDataModule = Module(new ArrayMulDataModule(len)) +// mulDataModule.io.a := io.in.bits.src(0) +// mulDataModule.io.b := io.in.bits.src(1) +// mulDataModule.io.regEnables := VecInit((1 to latency) map (i => regEnable(i))) +// val result = mulDataModule.io.result + +// var ctrlVec = Seq(ctrl) +// for (i <- 1 to latency) { +// ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i - 1)) +// } +// val 32 = len - 1 +// val res = Mux(ctrlVec.last.isHi, result(2 * 32 - 1, 32), result(32 - 1, 0)) + +// io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31, 0), 32), res) + +// XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n") +// } diff --git a/chisel/playground/src/pipeline/fetch/BranchPredictorUnit.scala b/chisel/playground/src/pipeline/fetch/BranchPredictorUnit.scala new file mode 100644 index 0000000..32ccb75 --- /dev/null +++ b/chisel/playground/src/pipeline/fetch/BranchPredictorUnit.scala @@ -0,0 +1,199 @@ +package cpu.pipeline.fetch + +import chisel3._ +import chisel3.util._ +import cpu.defines.Const._ +import cpu._ +import cpu.pipeline.decoder.Src12Read + +class ExecuteUnitBranchPredictor extends Bundle { + val bpuConfig = new BranchPredictorConfig() + val pc = Output(UInt(DATA_ADDR_WID.W)) + val update_pht_index = Output(UInt(bpuConfig.phtDepth.W)) + val branch_inst = Output(Bool()) + val branch = Output(Bool()) +} + +class BranchPredictorIO(implicit config: CpuConfig) extends Bundle { + val bpuConfig = new BranchPredictorConfig() + val decoder = new Bundle { + val inst = Input(UInt(INST_WID.W)) + val op = Input(UInt(OP_WID.W)) + val ena = Input(Bool()) + val pc = Input(UInt(DATA_ADDR_WID.W)) + val pc_plus4 = Input(UInt(DATA_ADDR_WID.W)) + val pht_index = Input(UInt(bpuConfig.phtDepth.W)) + + val rs1 = Input(UInt(REG_ADDR_WID.W)) + val rs2 = Input(UInt(REG_ADDR_WID.W)) + + val branch_inst = Output(Bool()) + val pred_branch = Output(Bool()) + val branch_target = Output(UInt(DATA_ADDR_WID.W)) + val update_pht_index = Output(UInt(bpuConfig.phtDepth.W)) + } + + val instBuffer = new Bundle { + val pc = Input(Vec(config.instFetchNum, UInt(PC_WID.W))) + val pht_index = Output(Vec(config.instFetchNum, UInt(bpuConfig.phtDepth.W))) + } + + val execute = Flipped(new ExecuteUnitBranchPredictor()) + + val regfile = if (config.branchPredictor == "pesudo") Some(new Src12Read()) else None +} + +class BranchPredictorUnit(implicit config: CpuConfig) extends Module { + val io = IO(new BranchPredictorIO()) + + if (config.branchPredictor == "adaptive") { + val adaptive_predictor = Module(new AdaptiveTwoLevelPredictor()) + io <> adaptive_predictor.io + } + + if (config.branchPredictor == "pesudo") { + val pesudo_predictor = Module(new PesudoBranchPredictor()) + io <> pesudo_predictor.io + } + + if (config.branchPredictor == "global") { + val global_predictor = Module(new GlobalBranchPredictor()) + io <> global_predictor.io + } +} + +class PesudoBranchPredictor(implicit config: CpuConfig) extends Module { + val io = IO(new BranchPredictorIO()) + io.decoder.branch_inst := VecInit(EXE_BEQ, EXE_BNE, EXE_BGTZ, EXE_BLEZ, EXE_BGEZ, EXE_BGEZAL, EXE_BLTZ, EXE_BLTZAL) + .contains(io.decoder.op) + io.decoder.branch_target := io.decoder.pc_plus4 + Cat( + Fill(14, io.decoder.inst(15)), + io.decoder.inst(15, 0), + 0.U(2.W) + ) + + io.regfile.get.src1.raddr := io.decoder.rs1 + io.regfile.get.src2.raddr := io.decoder.rs2 + val (src1, src2) = (io.regfile.get.src1.rdata, io.regfile.get.src2.rdata) + val pred_branch = MuxLookup(io.decoder.op, false.B)( + Seq( + EXE_BEQ -> (src1 === src2), + EXE_BNE -> (src1 =/= src2), + EXE_BGTZ -> (!src1(31) && (src1 =/= 0.U)), + EXE_BLEZ -> (src1(31) || src1 === 0.U), + EXE_BGEZ -> (!src1(31)), + EXE_BGEZAL -> (!src1(31)), + EXE_BLTZ -> (src1(31)), + EXE_BLTZAL -> (src1(31)) + ) + ) + + io.decoder.pred_branch := io.decoder.ena && io.decoder.branch_inst && pred_branch +} + +class GlobalBranchPredictor( + GHR_DEPTH: Int = 4, // 可以记录的历史记录个数 + PC_HASH_WID: Int = 4, // 取得PC的宽度 + PHT_DEPTH: Int = 6, // 可以记录的历史个数 + BHT_DEPTH: Int = 4 // 取得PC的宽度 +)( + implicit + config: CpuConfig) + extends Module { + val io = IO(new BranchPredictorIO()) + + val strongly_not_taken :: weakly_not_taken :: weakly_taken :: strongly_taken :: Nil = Enum(4) + + io.decoder.branch_inst := VecInit(EXE_BEQ, EXE_BNE, EXE_BGTZ, EXE_BLEZ, EXE_BGEZ, EXE_BGEZAL, EXE_BLTZ, EXE_BLTZAL) + .contains(io.decoder.op) + io.decoder.branch_target := io.decoder.pc_plus4 + Cat( + Fill(14, io.decoder.inst(15)), + io.decoder.inst(15, 0), + 0.U(2.W) + ) + // 局部预测模式 + + val bht = RegInit(VecInit(Seq.fill(1 << BHT_DEPTH)(0.U(PHT_DEPTH.W)))) + val pht = RegInit(VecInit(Seq.fill(1 << PHT_DEPTH)(strongly_taken))) + val bht_index = io.decoder.pc(1 + BHT_DEPTH, 2) + val pht_index = bht(bht_index) + + io.decoder.pred_branch := + io.decoder.ena && io.decoder.branch_inst && (pht(pht_index) === weakly_taken || pht(pht_index) === strongly_taken) + val update_bht_index = io.execute.pc(1 + BHT_DEPTH, 2) + val update_pht_index = bht(update_bht_index) + + when(io.execute.branch_inst) { + bht(update_bht_index) := Cat(bht(update_bht_index)(PHT_DEPTH - 2, 0), io.execute.branch) + switch(pht(update_pht_index)) { + is(strongly_not_taken) { + pht(update_pht_index) := Mux(io.execute.branch, weakly_not_taken, strongly_not_taken) + } + is(weakly_not_taken) { + pht(update_pht_index) := Mux(io.execute.branch, weakly_taken, strongly_not_taken) + } + is(weakly_taken) { + pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_not_taken) + } + is(strongly_taken) { + pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_taken) + } + } + } + +} + +class AdaptiveTwoLevelPredictor( +)( + implicit + config: CpuConfig) + extends Module { + val bpuConfig = new BranchPredictorConfig() + val PHT_DEPTH = bpuConfig.phtDepth + val BHT_DEPTH = bpuConfig.bhtDepth + val io = IO(new BranchPredictorIO()) + + val strongly_not_taken :: weakly_not_taken :: weakly_taken :: strongly_taken :: Nil = Enum(4) + + io.decoder.branch_inst := + VecInit(EXE_BEQ, EXE_BNE, EXE_BGTZ, EXE_BLEZ, EXE_BGEZ, EXE_BGEZAL, EXE_BLTZ, EXE_BLTZAL).contains(io.decoder.op) + io.decoder.branch_target := io.decoder.pc_plus4 + Cat( + Fill(14, io.decoder.inst(15)), + io.decoder.inst(15, 0), + 0.U(2.W) + ) + + val bht = RegInit(VecInit(Seq.fill(1 << BHT_DEPTH)(0.U(PHT_DEPTH.W)))) + val pht = RegInit(VecInit(Seq.fill(1 << PHT_DEPTH)(strongly_taken))) + val pht_index = io.decoder.pht_index + + for (i <- 0 until config.instFetchNum) { + io.instBuffer.pht_index(i) := bht(io.instBuffer.pc(i)(1 + BHT_DEPTH, 2)) + } + + io.decoder.pred_branch := + io.decoder.ena && io.decoder.branch_inst && (pht(pht_index) === weakly_taken || pht(pht_index) === strongly_taken) + io.decoder.update_pht_index := bht(io.decoder.pc(1 + BHT_DEPTH, 2)) + + val update_bht_index = io.execute.pc(1 + BHT_DEPTH, 2) + val update_pht_index = io.execute.update_pht_index + + when(io.execute.branch_inst) { + bht(update_bht_index) := Cat(bht(update_bht_index)(PHT_DEPTH - 2, 0), io.execute.branch) + switch(pht(update_pht_index)) { + is(strongly_not_taken) { + pht(update_pht_index) := Mux(io.execute.branch, weakly_not_taken, strongly_not_taken) + } + is(weakly_not_taken) { + pht(update_pht_index) := Mux(io.execute.branch, weakly_taken, strongly_not_taken) + } + is(weakly_taken) { + pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_not_taken) + } + is(strongly_taken) { + pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_taken) + } + } + } + +} diff --git a/chisel/playground/src/pipeline/fetch/FetchUnit.scala b/chisel/playground/src/pipeline/fetch/FetchUnit.scala new file mode 100644 index 0000000..6b48e3a --- /dev/null +++ b/chisel/playground/src/pipeline/fetch/FetchUnit.scala @@ -0,0 +1,57 @@ +package cpu.pipeline.fetch + +import chisel3._ +import chisel3.util._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class FetchUnit(implicit + val config: CpuConfig, +) extends Module { + val io = IO(new Bundle { + val memory = new Bundle { + val flush = Input(Bool()) + val flush_pc = Input(UInt(PC_WID.W)) + } + val decoder = new Bundle { + val branch = Input(Bool()) + val target = Input(UInt(PC_WID.W)) + } + val execute = new Bundle { + val branch = Input(Bool()) + val target = Input(UInt(PC_WID.W)) + } + val instFifo = new Bundle { + val full = Input(Bool()) + } + val iCache = new Bundle { + val inst_valid = Input(Vec(config.instFetchNum, Bool())) + val pc = Output(UInt(PC_WID.W)) + val pc_next = Output(UInt(PC_WID.W)) + } + + }) + val pc = RegNext(io.iCache.pc_next, "h_bfc00000".U(32.W)) + io.iCache.pc := pc + + // when inst_valid(1) is true, inst_valid(0) must be true + + val pc_next_temp = Wire(UInt(PC_WID.W)) + + pc_next_temp := pc + for (i <- 0 until config.instFetchNum) { + when(io.iCache.inst_valid(i)) { + pc_next_temp := pc + ((i + 1) * 4).U + } + } + + io.iCache.pc_next := MuxCase( + pc_next_temp, + Seq( + io.memory.flush -> io.memory.flush_pc, + io.execute.branch -> io.execute.target, + io.decoder.branch -> io.decoder.target, + io.instFifo.full -> pc, + ), + ) +} diff --git a/chisel/playground/src/pipeline/fetch/InstFifo.scala b/chisel/playground/src/pipeline/fetch/InstFifo.scala new file mode 100644 index 0000000..546fbda --- /dev/null +++ b/chisel/playground/src/pipeline/fetch/InstFifo.scala @@ -0,0 +1,141 @@ +package cpu.pipeline.fetch + +import chisel3._ +import chisel3.util._ +import cpu.{CpuConfig, BranchPredictorConfig} + +class BufferUnit extends Bundle { + val bpuConfig = new BranchPredictorConfig() + val tlb = new Bundle { + val refill = Bool() + val invalid = Bool() + } + val inst = UInt(32.W) + val pht_index = UInt(bpuConfig.phtDepth.W) + val pc = UInt(32.W) +} + +class InstFifo(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val do_flush = Input(Bool()) + val flush_delay_slot = Input(Bool()) + val delay_sel_flush = Input(Bool()) + val decoder_delay_flush = Input(Bool()) + val execute_delay_flush = Input(Bool()) + val icache_stall = Input(Bool()) + val jump_branch_inst = Input(Bool()) // 译码阶段的inst0是否为跳转指令 + val inst0_is_in_delayslot = Output(Bool()) + + val ren = Input(Vec(config.decoderNum, Bool())) + val read = Output(Vec(config.decoderNum, new BufferUnit())) + + val wen = Input(Vec(config.instFetchNum, Bool())) + val write = Input(Vec(config.instFetchNum, new BufferUnit())) + + val empty = Output(Bool()) + val almost_empty = Output(Bool()) + val full = Output(Bool()) + }) + // fifo buffer + val buffer = RegInit(VecInit(Seq.fill(config.instFifoDepth)(0.U.asTypeOf(new BufferUnit())))) + + // fifo ptr + val enq_ptr = RegInit(0.U(log2Ceil(config.instFifoDepth).W)) + val deq_ptr = RegInit(0.U(log2Ceil(config.instFifoDepth).W)) + val count = RegInit(0.U(log2Ceil(config.instFifoDepth).W)) + + // config.instFifoDepth - 1 is the last element, config.instFifoDepth - 2 is the last second element + // the second last element's valid decide whether the fifo is full + io.full := count >= (config.instFifoDepth - config.instFetchNum).U // TODO:这里的等于号还可以优化 + io.empty := count === 0.U + io.almost_empty := count === 1.U + + val inst0_is_in_delayslot = RegInit(false.B) + io.inst0_is_in_delayslot := inst0_is_in_delayslot + inst0_is_in_delayslot := MuxCase( + false.B, + Seq( + io.flush_delay_slot -> false.B, + !io.ren(0) -> inst0_is_in_delayslot, + (io.jump_branch_inst && !io.ren(1)) -> true.B, + ), + ) + + val delayslot_stall = RegInit(false.B) + val delayslot_enable = RegInit(false.B) + val delayslot_line = RegInit(0.U.asTypeOf(new BufferUnit())) + when(io.do_flush && io.delay_sel_flush && !io.flush_delay_slot && io.icache_stall && (io.empty || io.almost_empty)) { + delayslot_stall := true.B + }.elsewhen(delayslot_stall && io.wen(0)) { + delayslot_stall := false.B + } + + when(io.do_flush && !io.flush_delay_slot && io.delay_sel_flush) { + when(io.execute_delay_flush) { + delayslot_enable := true.B + delayslot_line := Mux(io.empty, io.write(0), buffer(deq_ptr)) + }.elsewhen(io.decoder_delay_flush) { + delayslot_enable := true.B + delayslot_line := Mux(io.almost_empty, io.write(0), buffer(deq_ptr + 1.U)) + }.otherwise { + delayslot_enable := false.B + } + }.elsewhen(!delayslot_stall && io.ren(0)) { + delayslot_enable := false.B + } + + // * deq * // + io.read(0) := MuxCase( + buffer(deq_ptr), + Seq( + delayslot_enable -> delayslot_line, + io.empty -> 0.U.asTypeOf(new BufferUnit()), + io.almost_empty -> buffer(deq_ptr), + ), + ) + io.read(1) := MuxCase( + buffer(deq_ptr + 1.U), + Seq( + (delayslot_enable || io.empty || io.almost_empty) -> 0.U.asTypeOf(new BufferUnit()), + ), + ) + + val deq_num = MuxCase( + 0.U, + Seq( + (io.empty || delayslot_enable) -> 0.U, + io.ren(1) -> 2.U, + io.ren(0) -> 1.U, + ), + ) + + when(io.do_flush) { + deq_ptr := 0.U + }.otherwise { + deq_ptr := deq_ptr + deq_num + } + + // * enq * // + val enq_num = Wire(UInt(log2Ceil(config.instFetchNum + 1).W)) + + for (i <- 0 until config.instFetchNum) { + when(io.wen(i)) { + buffer(enq_ptr + i.U) := io.write(i) + } + } + + when(io.do_flush) { + enq_ptr := 0.U + }.otherwise { + enq_ptr := enq_ptr + enq_num + } + + enq_num := 0.U + for (i <- 0 until config.instFetchNum) { + when(io.wen(i)) { + enq_num := (i + 1).U + } + } + + count := Mux(io.do_flush, 0.U, count + enq_num + config.instFifoDepth.U - deq_num) +} diff --git a/chisel/playground/src/pipeline/fetch/PreDecoder.scala b/chisel/playground/src/pipeline/fetch/PreDecoder.scala new file mode 100644 index 0000000..cefd89a --- /dev/null +++ b/chisel/playground/src/pipeline/fetch/PreDecoder.scala @@ -0,0 +1,94 @@ +package cpu.pipeline.fetch + +import chisel3._ +import chisel3.util._ +import cpu.defines.Const._ +import cpu.CpuConfig +import cpu.pipeline.fetch.BufferUnit + +class BufferEnq extends Bundle { + val valid = Bool() + val jump_branch_inst = Bool() + val op = UInt(OP_WID.W) + val is_in_delayslot = Bool() + + val tlb = new Bundle { + val refill = Bool() + val invalid = Bool() + } + val inst = UInt(32.W) + val pc = UInt(32.W) +} + +class PreDecoder(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val flush = Input(Bool()) + + val full = new Bundle { + val fromInstFifo = Input(Bool()) + val toIcache = Output(Bool()) + } + val read = Output(Vec(config.instFetchNum, new BufferEnq())) + + val wen = Input(Vec(config.instFetchNum, Bool())) + val write = Input(Vec(config.instFetchNum, new BufferUnit())) + }) + + val buffer = RegInit(VecInit(Seq.fill(config.instFetchNum)(0.U.asTypeOf(new BufferEnq())))) + + for (i <- 0 until config.instFetchNum) { + when(io.wen(i) && !io.full.fromInstFifo) { + buffer(i).tlb.refill := io.write(i).tlb.refill + buffer(i).tlb.invalid := io.write(i).tlb.invalid + buffer(i).inst := io.write(i).inst + buffer(i).pc := io.write(i).pc + } + when(!io.full.fromInstFifo) { + buffer(i).valid := io.wen(i) + } + } + io.full.toIcache := io.full.fromInstFifo + + for (i <- 0 until config.instFetchNum) { + val signals: List[UInt] = ListLookup( + buffer(i).inst, + List(EXE_NOP, false.B), + Array( // 跳转指令 + J -> List(EXE_J, true.B), + JAL -> List(EXE_JAL, true.B), + JR -> List(EXE_JR, true.B), + JALR -> List(EXE_JALR, true.B), + BEQ -> List(EXE_BEQ, true.B), + BNE -> List(EXE_BNE, true.B), + BGTZ -> List(EXE_BGTZ, true.B), + BLEZ -> List(EXE_BLEZ, true.B), + BGEZ -> List(EXE_BGEZ, true.B), + BGEZAL -> List(EXE_BGEZAL, true.B), + BLTZ -> List(EXE_BLTZ, true.B), + BLTZAL -> List(EXE_BLTZAL, true.B), + ), + ) + val op :: jump_branch_inst :: Nil = signals + + io.read(i).tlb.refill := buffer(i).tlb.refill + io.read(i).tlb.invalid := buffer(i).tlb.invalid + io.read(i).inst := buffer(i).inst + io.read(i).pc := buffer(i).pc + io.read(i).valid := buffer(i).valid + io.read(i).jump_branch_inst := jump_branch_inst + io.read(i).op := op + } + + val inst0_is_in_delayslot = RegNext(buffer(config.instFetchNum - 1).jump_branch_inst) + + for (i <- 1 until config.instFetchNum) { + io.read(i).is_in_delayslot := buffer(i - 1).jump_branch_inst + } + io.read(0).is_in_delayslot := inst0_is_in_delayslot + + when(io.flush) { + for (i <- 0 until config.instFetchNum) { + buffer(i).valid := false.B + } + } +} diff --git a/chisel/playground/src/pipeline/memory/DataMemoryAccess.scala b/chisel/playground/src/pipeline/memory/DataMemoryAccess.scala new file mode 100644 index 0000000..9648b19 --- /dev/null +++ b/chisel/playground/src/pipeline/memory/DataMemoryAccess.scala @@ -0,0 +1,168 @@ +package cpu.pipeline.memory + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class DataMemoryAccess(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val memoryUnit = new Bundle { + val in = Input(new Bundle { + val mem_en = Bool() + val inst_info = new InstInfo() + val mem_wdata = UInt(DATA_WID.W) + val mem_addr = UInt(DATA_ADDR_WID.W) + val mem_sel = Vec(config.fuNum, Bool()) + val ex = Vec(config.fuNum, new ExceptionInfo()) + val llbit = Bool() + }) + val out = Output(new Bundle { + val rdata = Output(UInt(DATA_WID.W)) + }) + } + + val dataMemory = new Bundle { + val in = Input(new Bundle { + val rdata = UInt(DATA_WID.W) + }) + val out = Output(new Bundle { + val en = Bool() + val rlen = UInt(2.W) + val wen = UInt(4.W) + val addr = UInt(DATA_ADDR_WID.W) + val wdata = UInt(DATA_WID.W) + }) + } + }) + val mem_addr = io.memoryUnit.in.mem_addr + val mem_addr2 = mem_addr(1, 0) + val mem_rdata = io.dataMemory.in.rdata + val mem_wdata = io.memoryUnit.in.mem_wdata + val op = io.memoryUnit.in.inst_info.op + io.dataMemory.out.en := io.memoryUnit.in.mem_en && + (io.memoryUnit.in.mem_sel(0) && !io.memoryUnit.in.ex(0).flush_req || + io.memoryUnit.in.mem_sel(1) && !io.memoryUnit.in.ex(0).flush_req && !io.memoryUnit.in.ex(1).flush_req) + io.dataMemory.out.addr := mem_addr + + io.memoryUnit.out.rdata := MuxLookup(op, 0.U)( + Seq( + EXE_LB -> MuxLookup(mem_addr2, 0.U)( + Seq( + "b11".U -> Util.signedExtend(mem_rdata(31, 24)), + "b10".U -> Util.signedExtend(mem_rdata(23, 16)), + "b01".U -> Util.signedExtend(mem_rdata(15, 8)), + "b00".U -> Util.signedExtend(mem_rdata(7, 0)) + ) + ), + EXE_LBU -> MuxLookup(mem_addr2, 0.U)( + Seq( + "b11".U -> Util.zeroExtend(mem_rdata(31, 24)), + "b10".U -> Util.zeroExtend(mem_rdata(23, 16)), + "b01".U -> Util.zeroExtend(mem_rdata(15, 8)), + "b00".U -> Util.zeroExtend(mem_rdata(7, 0)) + ) + ), + EXE_LH -> Mux( + mem_addr2(1), + Util.signedExtend(mem_rdata(31, 16)), + Util.signedExtend(mem_rdata(15, 0)) + ), + EXE_LHU -> Mux( + mem_addr2(1), + Util.zeroExtend(mem_rdata(31, 16)), + Util.zeroExtend(mem_rdata(15, 0)) + ), + EXE_LW -> mem_rdata, + EXE_LL -> mem_rdata, + EXE_LWL -> MuxLookup(mem_addr2, 0.U)( + Seq( + "b11".U -> mem_rdata, + "b10".U -> Cat(mem_rdata(23, 0), mem_wdata(7, 0)), + "b01".U -> Cat(mem_rdata(15, 0), mem_wdata(15, 0)), + "b00".U -> Cat(mem_rdata(7, 0), mem_wdata(23, 0)) + ) + ), + EXE_LWR -> MuxLookup(mem_addr2, 0.U)( + Seq( + "b11".U -> Cat(mem_wdata(31, 8), mem_rdata(31, 24)), + "b10".U -> Cat(mem_wdata(31, 16), mem_rdata(31, 16)), + "b01".U -> Cat(mem_wdata(31, 24), mem_rdata(31, 8)), + "b00".U -> mem_rdata + ) + ) + ) + ) + io.dataMemory.out.wdata := MuxLookup(op, mem_wdata)( // default SW, SC + Seq( + EXE_SB -> Fill(4, mem_wdata(7, 0)), + EXE_SH -> Fill(2, mem_wdata(15, 0)), + EXE_SWL -> MuxLookup(mem_addr2, 0.U)( + Seq( + "b11".U -> mem_wdata, + "b10".U -> Cat(0.U(8.W), mem_wdata(31, 8)), + "b01".U -> Cat(0.U(16.W), mem_wdata(31, 16)), + "b00".U -> Cat(0.U(24.W), mem_wdata(31, 24)) + ) + ), + EXE_SWR -> MuxLookup(mem_addr2, 0.U)( + Seq( + "b11".U -> Cat(mem_wdata(7, 0), 0.U(24.W)), + "b10".U -> Cat(mem_wdata(15, 0), 0.U(16.W)), + "b01".U -> Cat(mem_wdata(23, 0), 0.U(8.W)), + "b00".U -> mem_wdata + ) + ) + ) + ) + io.dataMemory.out.wen := MuxLookup(op, 0.U)( + Seq( + EXE_SB -> MuxLookup(mem_addr2, 0.U)( + Seq( + "b11".U -> "b1000".U, + "b10".U -> "b0100".U, + "b01".U -> "b0010".U, + "b00".U -> "b0001".U + ) + ), + EXE_SH -> Mux(mem_addr2(1), "b1100".U, "b0011".U), + EXE_SW -> "b1111".U, + EXE_SC -> Fill(4, io.memoryUnit.in.llbit), + EXE_SWL -> MuxLookup(mem_addr2, 0.U)( + Seq( + "b11".U -> "b1111".U, + "b10".U -> "b0111".U, + "b01".U -> "b0011".U, + "b00".U -> "b0001".U + ) + ), + EXE_SWR -> MuxLookup(mem_addr2, 0.U)( + Seq( + "b11".U -> "b1000".U, + "b10".U -> "b1100".U, + "b01".U -> "b1110".U, + "b00".U -> "b1111".U + ) + ) + ) + ) + io.dataMemory.out.rlen := MuxLookup(op, 0.U)( + Seq( + EXE_LW -> 2.U, + EXE_LL -> 2.U, + EXE_LH -> 1.U, + EXE_LHU -> 1.U, + EXE_LB -> 0.U, + EXE_LBU -> 0.U, + EXE_LWL -> 2.U, + EXE_LWR -> 2.U, + EXE_SW -> 2.U, + EXE_SWL -> 2.U, + EXE_SWR -> 2.U, + EXE_SC -> 2.U, + EXE_SH -> 1.U, + EXE_SB -> 0.U + ) + ) +} diff --git a/chisel/playground/src/pipeline/memory/MemoryStage.scala b/chisel/playground/src/pipeline/memory/MemoryStage.scala new file mode 100644 index 0000000..44bfb01 --- /dev/null +++ b/chisel/playground/src/pipeline/memory/MemoryStage.scala @@ -0,0 +1,64 @@ +package cpu.pipeline.memory + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig + +class Cp0Info extends Bundle { + val cp0_count = UInt(DATA_WID.W) + val cp0_random = UInt(DATA_WID.W) + val cp0_cause = UInt(DATA_WID.W) +} + +class ExeMemInst1 extends Bundle { + val pc = UInt(PC_WID.W) + val inst_info = new InstInfo() + val rd_info = new RdInfo() + val ex = new ExceptionInfo() +} + +class ExeMemInst0(implicit val config: CpuConfig) extends ExeMemInst1 { + val cp0 = new Cp0Info() + val mem = new Bundle { + val en = Bool() + val ren = Bool() + val wen = Bool() + val inst_info = new InstInfo() + val addr = UInt(DATA_ADDR_WID.W) + val wdata = UInt(DATA_WID.W) + val sel = Vec(config.fuNum, Bool()) + val llbit = Bool() + } +} + +class ExecuteUnitMemoryUnit(implicit val config: CpuConfig) extends Bundle { + + val inst0 = new ExeMemInst0() + val inst1 = new ExeMemInst1() +} + +class MemoryStage(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val ctrl = Input(new Bundle { + val allow_to_go = Bool() + val clear = Bool() + }) + val executeUnit = Input(new ExecuteUnitMemoryUnit()) + val memoryUnit = Output(new ExecuteUnitMemoryUnit()) + }) + val inst0 = RegInit(0.U.asTypeOf(new ExeMemInst0())) + val inst1 = RegInit(0.U.asTypeOf(new ExeMemInst1())) + + when(io.ctrl.clear) { + inst0 := 0.U.asTypeOf(new ExeMemInst0()) + inst1 := 0.U.asTypeOf(new ExeMemInst1()) + }.elsewhen(io.ctrl.allow_to_go) { + inst0 := io.executeUnit.inst0 + inst1 := io.executeUnit.inst1 + } + + io.memoryUnit.inst0 := inst0 + io.memoryUnit.inst1 := inst1 +} diff --git a/chisel/playground/src/pipeline/memory/MemoryUnit.scala b/chisel/playground/src/pipeline/memory/MemoryUnit.scala new file mode 100644 index 0000000..0e9150c --- /dev/null +++ b/chisel/playground/src/pipeline/memory/MemoryUnit.scala @@ -0,0 +1,122 @@ +package cpu.pipeline.memory + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig +import cpu.pipeline.decoder.RegWrite +import cpu.pipeline.execute.Cp0MemoryUnit +import cpu.pipeline.writeback.MemoryUnitWriteBackUnit + +class MemoryUnit(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val ctrl = new MemoryCtrl() + val memoryStage = Input(new ExecuteUnitMemoryUnit()) + val fetchUnit = Output(new Bundle { + val flush = Bool() + val flush_pc = UInt(PC_WID.W) + }) + val decoderUnit = Output(Vec(config.fuNum, new RegWrite())) + val cp0 = Flipped(new Cp0MemoryUnit()) + val writeBackStage = Output(new MemoryUnitWriteBackUnit()) + val dataMemory = new Bundle { + val in = Input(new Bundle { + val tlb = new Bundle { + val invalid = Bool() + val refill = Bool() + val modify = Bool() + } + val rdata = UInt(DATA_WID.W) + }) + val out = Output(new Bundle { + val en = Bool() + val rlen = UInt(2.W) + val wen = UInt(4.W) + val addr = UInt(DATA_ADDR_WID.W) + val wdata = UInt(DATA_WID.W) + }) + } + }) + + val dataMemoryAccess = Module(new DataMemoryAccess()).io + dataMemoryAccess.memoryUnit.in.mem_en := io.memoryStage.inst0.mem.en + dataMemoryAccess.memoryUnit.in.inst_info := io.memoryStage.inst0.mem.inst_info + dataMemoryAccess.memoryUnit.in.mem_wdata := io.memoryStage.inst0.mem.wdata + dataMemoryAccess.memoryUnit.in.mem_addr := io.memoryStage.inst0.mem.addr + dataMemoryAccess.memoryUnit.in.mem_sel := io.memoryStage.inst0.mem.sel + dataMemoryAccess.memoryUnit.in.ex(0) := io.memoryStage.inst0.ex + dataMemoryAccess.memoryUnit.in.ex(1) := io.memoryStage.inst1.ex + dataMemoryAccess.dataMemory.in.rdata := io.dataMemory.in.rdata + dataMemoryAccess.memoryUnit.in.llbit := io.memoryStage.inst0.mem.llbit + io.dataMemory.out := dataMemoryAccess.dataMemory.out + + io.decoderUnit(0).wen := io.writeBackStage.inst0.inst_info.reg_wen + io.decoderUnit(0).waddr := io.writeBackStage.inst0.inst_info.reg_waddr + io.decoderUnit(0).wdata := io.writeBackStage.inst0.rd_info.wdata + io.decoderUnit(1).wen := io.writeBackStage.inst1.inst_info.reg_wen + io.decoderUnit(1).waddr := io.writeBackStage.inst1.inst_info.reg_waddr + io.decoderUnit(1).wdata := io.writeBackStage.inst1.rd_info.wdata + + io.writeBackStage.inst0.pc := io.memoryStage.inst0.pc + io.writeBackStage.inst0.inst_info := io.memoryStage.inst0.inst_info + io.writeBackStage.inst0.rd_info.wdata := Mux( + io.writeBackStage.inst0.inst_info.mem_wreg, + dataMemoryAccess.memoryUnit.out.rdata, + io.memoryStage.inst0.rd_info.wdata, + ) + io.writeBackStage.inst0.ex := io.memoryStage.inst0.ex + val inst0_access_mem = + (io.dataMemory.out.en && (io.dataMemory.in.tlb.invalid || io.dataMemory.in.tlb.refill) && io.memoryStage.inst0.inst_info.fusel === FU_MEM) + val inst0_tlbmod = + (io.dataMemory.in.tlb.modify && io.dataMemory.out.wen.orR && io.memoryStage.inst0.inst_info.fusel === FU_MEM) + io.writeBackStage.inst0.ex.excode := MuxCase( + io.memoryStage.inst0.ex.excode, + Seq( + (io.memoryStage.inst0.ex.excode =/= EX_NO) -> io.memoryStage.inst0.ex.excode, + inst0_access_mem -> Mux(io.dataMemory.out.wen.orR, EX_TLBS, EX_TLBL), + inst0_tlbmod -> EX_MOD, + ), + ) + io.writeBackStage.inst0.ex.tlb_refill := io.memoryStage.inst0.ex.tlb_refill && io.memoryStage.inst0.ex.excode === EX_TLBL || io.dataMemory.in.tlb.refill && io.memoryStage.inst0.inst_info.fusel === FU_MEM + io.writeBackStage.inst0.ex.flush_req := io.memoryStage.inst0.ex.flush_req || io.writeBackStage.inst0.ex.excode =/= EX_NO || io.writeBackStage.inst0.ex.tlb_refill + io.writeBackStage.inst0.cp0 := io.memoryStage.inst0.cp0 + + io.writeBackStage.inst1.pc := io.memoryStage.inst1.pc + io.writeBackStage.inst1.inst_info := io.memoryStage.inst1.inst_info + io.writeBackStage.inst1.rd_info.wdata := Mux( + io.writeBackStage.inst1.inst_info.mem_wreg, + dataMemoryAccess.memoryUnit.out.rdata, + io.memoryStage.inst1.rd_info.wdata, + ) + io.writeBackStage.inst1.ex := io.memoryStage.inst1.ex + val inst1_access_mem = + (io.dataMemory.out.en && (io.dataMemory.in.tlb.invalid || io.dataMemory.in.tlb.refill) && io.memoryStage.inst1.inst_info.fusel === FU_MEM) + val inst1_tlbmod = + (io.dataMemory.in.tlb.modify && io.dataMemory.out.wen.orR && io.memoryStage.inst1.inst_info.fusel === FU_MEM) + io.writeBackStage.inst1.ex.excode := MuxCase( + io.memoryStage.inst1.ex.excode, + Seq( + (io.memoryStage.inst1.ex.excode =/= EX_NO) -> io.memoryStage.inst1.ex.excode, + inst1_access_mem -> Mux(io.dataMemory.out.wen.orR, EX_TLBS, EX_TLBL), + inst1_tlbmod -> EX_MOD, + ), + ) + io.writeBackStage.inst1.ex.tlb_refill := io.memoryStage.inst1.ex.tlb_refill && io.memoryStage.inst1.ex.excode === EX_TLBL || io.dataMemory.in.tlb.refill && io.memoryStage.inst1.inst_info.fusel === FU_MEM + io.writeBackStage.inst1.ex.flush_req := io.memoryStage.inst1.ex.flush_req || io.writeBackStage.inst1.ex.excode =/= EX_NO || io.writeBackStage.inst1.ex.tlb_refill + + io.cp0.in.inst(0).pc := io.writeBackStage.inst0.pc + io.cp0.in.inst(0).ex := io.writeBackStage.inst0.ex + io.cp0.in.inst(1).pc := io.writeBackStage.inst1.pc + io.cp0.in.inst(1).ex := io.writeBackStage.inst1.ex + + io.fetchUnit.flush := Mux( + io.cp0.out.flush, + io.cp0.out.flush, + io.writeBackStage.inst0.inst_info.op === EXE_MTC0 && io.ctrl.allow_to_go, + ) + io.fetchUnit.flush_pc := Mux(io.cp0.out.flush, io.cp0.out.flush_pc, io.writeBackStage.inst0.pc + 4.U) + + io.ctrl.flush_req := io.fetchUnit.flush + io.ctrl.eret := io.writeBackStage.inst0.ex.eret +} diff --git a/chisel/playground/src/pipeline/writeback/CommitBuffer.scala b/chisel/playground/src/pipeline/writeback/CommitBuffer.scala new file mode 100644 index 0000000..4ac033b --- /dev/null +++ b/chisel/playground/src/pipeline/writeback/CommitBuffer.scala @@ -0,0 +1,99 @@ +package cpu.pipeline.writeback + +import chisel3._ +import chisel3.util._ + +class CommitBuffer( + depth: Int = 32, +) extends Module { + val io = IO(new Bundle { + val flush = Input(Bool()) + val enq = Flipped( + Vec( + 2, + new Bundle { + val wb_pc = Output(UInt(32.W)) + val wb_rf_wen = Output(UInt(4.W)) + val wb_rf_wnum = Output(UInt(5.W)) + val wb_rf_wdata = Output(UInt(32.W)) + }, + ), + ) + val deq = new Bundle { + val wb_pc = Output(UInt(32.W)) + val wb_rf_wen = Output(UInt(4.W)) + val wb_rf_wnum = Output(UInt(5.W)) + val wb_rf_wdata = Output(UInt(32.W)) + } + }) + + val ram = RegInit(VecInit(Seq.fill(depth)(0.U.asTypeOf(new Bundle { + val wb_pc = UInt(32.W) + val wb_rf_wen = UInt(4.W) + val wb_rf_wnum = UInt(5.W) + val wb_rf_wdata = UInt(32.W) + })))) + val enq_ptr = RegInit(0.U(log2Ceil(depth).W)) + val deq_ptr = RegInit(0.U(log2Ceil(depth).W)) + val maybe_full = RegInit(false.B) + val ptr_match = enq_ptr === deq_ptr + val empty = ptr_match && !maybe_full + val full = ptr_match && maybe_full + val do_enq = Wire(Vec(2, Bool())) + val do_deq = WireDefault(io.deq.wb_rf_wen.orR) + + for { i <- 0 until 2 } { + do_enq(i) := io.enq(i).wb_rf_wen.orR + } + + val next_enq_ptr = MuxCase( + enq_ptr, + Seq( + io.flush -> 0.U, + (do_enq(0) && do_enq(1)) -> (enq_ptr + 2.U), + (do_enq(0) || do_enq(1)) -> (enq_ptr + 1.U), + ), + ) + + when(do_enq(0)) { + ram(enq_ptr) := io.enq(0) + } + + val enq1_ptr = Mux(do_enq(0), enq_ptr + 1.U, enq_ptr) + when(do_enq(1)) { + ram(enq1_ptr) := io.enq(1) + } + + val next_deq_ptr = + Mux(do_deq, deq_ptr + 1.U, deq_ptr) + + when(do_enq(0) =/= do_deq) { + maybe_full := do_enq(0) + } + + when(do_enq(1)) { + maybe_full := do_enq(1) + } + + when(io.flush) { + enq_ptr := 0.U + deq_ptr := 0.U + maybe_full := false.B + }.otherwise { + enq_ptr := next_enq_ptr + deq_ptr := next_deq_ptr + } + + when(do_deq) { + ram(deq_ptr).wb_rf_wen := 0.U + } + + when(empty) { + do_deq := false.B + io.deq := DontCare + io.deq.wb_rf_wen := 0.U + }.otherwise { + io.deq := ram(deq_ptr) + } + +} diff --git a/chisel/playground/src/pipeline/writeback/WriteBackStage.scala b/chisel/playground/src/pipeline/writeback/WriteBackStage.scala new file mode 100644 index 0000000..f9f5a53 --- /dev/null +++ b/chisel/playground/src/pipeline/writeback/WriteBackStage.scala @@ -0,0 +1,46 @@ +package cpu.pipeline.writeback + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig +import cpu.pipeline.memory.Cp0Info + +class MemWbInst1 extends Bundle { + val pc = UInt(PC_WID.W) + val inst_info = new InstInfo() + val rd_info = new RdInfo() + val ex = new ExceptionInfo() +} +class MemWbInst0 extends MemWbInst1 { + val cp0 = new Cp0Info() +} + +class MemoryUnitWriteBackUnit extends Bundle { + val inst0 = new MemWbInst0() + val inst1 = new MemWbInst1() +} +class WriteBackStage(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val ctrl = Input(new Bundle { + val allow_to_go = Bool() + val clear = Bool() + }) + val memoryUnit = Input(new MemoryUnitWriteBackUnit()) + val writeBackUnit = Output(new MemoryUnitWriteBackUnit()) + }) + val inst0 = RegInit(0.U.asTypeOf(new MemWbInst0())) + val inst1 = RegInit(0.U.asTypeOf(new MemWbInst1())) + + when(io.ctrl.clear(0)) { + inst0 := 0.U.asTypeOf(new MemWbInst0()) + inst1 := 0.U.asTypeOf(new MemWbInst1()) + }.elsewhen(io.ctrl.allow_to_go) { + inst0 := io.memoryUnit.inst0 + inst1 := io.memoryUnit.inst1 + } + + io.writeBackUnit.inst0 := inst0 + io.writeBackUnit.inst1 := inst1 +} diff --git a/chisel/playground/src/pipeline/writeback/WriteBackUnit.scala b/chisel/playground/src/pipeline/writeback/WriteBackUnit.scala new file mode 100644 index 0000000..28efd79 --- /dev/null +++ b/chisel/playground/src/pipeline/writeback/WriteBackUnit.scala @@ -0,0 +1,78 @@ +package cpu.pipeline.writeback + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.pipeline.decoder.RegWrite +import cpu.CpuConfig + +class WriteBackUnit(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val ctrl = new WriteBackCtrl() + val writeBackStage = Input(new MemoryUnitWriteBackUnit()) + val regfile = Output(Vec(config.commitNum, new RegWrite())) + val debug = new DEBUG() + val statistic = if (!config.build) Some(new SocStatistic()) else None + }) + + io.regfile(0) + .wen := io.writeBackStage.inst0.inst_info.reg_wen && io.ctrl.allow_to_go && !io.writeBackStage.inst0.ex.flush_req + io.regfile(0).waddr := io.writeBackStage.inst0.inst_info.reg_waddr + io.regfile(0).wdata := io.writeBackStage.inst0.rd_info.wdata + + io.regfile(1).wen := + io.writeBackStage.inst1.inst_info.reg_wen && io.ctrl.allow_to_go && !io.writeBackStage.inst0.ex.flush_req && !io.writeBackStage.inst1.ex.flush_req + io.regfile(1).waddr := io.writeBackStage.inst1.inst_info.reg_waddr + io.regfile(1).wdata := io.writeBackStage.inst1.rd_info.wdata + + if (config.hasCommitBuffer) { + val buffer = Module(new CommitBuffer()).io + buffer.enq(0).wb_pc := io.writeBackStage.inst0.pc + buffer.enq(0).wb_rf_wen := io.regfile(0).wen + buffer.enq(0).wb_rf_wnum := io.regfile(0).waddr + buffer.enq(0).wb_rf_wdata := io.regfile(0).wdata + buffer.enq(1).wb_pc := io.writeBackStage.inst1.pc + buffer.enq(1).wb_rf_wen := io.regfile(1).wen + buffer.enq(1).wb_rf_wnum := io.regfile(1).waddr + buffer.enq(1).wb_rf_wdata := io.regfile(1).wdata + buffer.flush := io.ctrl.do_flush + + io.debug.wb_pc := buffer.deq.wb_pc + io.debug.wb_rf_wen := buffer.deq.wb_rf_wen + io.debug.wb_rf_wnum := buffer.deq.wb_rf_wnum + io.debug.wb_rf_wdata := buffer.deq.wb_rf_wdata + } else { + io.debug.wb_pc := Mux( + clock.asBool, + io.writeBackStage.inst0.pc, + Mux(io.writeBackStage.inst0.ex.flush_req, 0.U, io.writeBackStage.inst1.pc), + ) + io.debug.wb_rf_wen := Mux( + clock.asBool, + Fill(4, io.regfile(0).wen), + Fill(4, io.regfile(1).wen), + ) + io.debug.wb_rf_wnum := Mux( + clock.asBool, + io.regfile(0).waddr, + io.regfile(1).waddr, + ) + io.debug.wb_rf_wdata := Mux( + clock.asBool, + io.regfile(0).wdata, + io.regfile(1).wdata, + ) + } + + // ===----------------------------------------------------------------=== + // statistic + // ===----------------------------------------------------------------=== + if (!config.build) { + io.statistic.get.cp0_cause := io.writeBackStage.inst0.cp0.cp0_cause + io.statistic.get.cp0_count := io.writeBackStage.inst0.cp0.cp0_count + io.statistic.get.cp0_random := io.writeBackStage.inst0.cp0.cp0_random + io.statistic.get.int := io.writeBackStage.inst0.ex.excode === EX_INT + io.statistic.get.commit := io.ctrl.allow_to_go + } +} diff --git a/chisel/playground/test/src/GCDSpec.scala b/chisel/playground/test/src/GCDSpec.scala deleted file mode 100644 index c6084a3..0000000 --- a/chisel/playground/test/src/GCDSpec.scala +++ /dev/null @@ -1,48 +0,0 @@ -import chisel3._ -import chiseltest._ -import chisel3.experimental.BundleLiterals._ - -import utest._ - -/** - * This is a trivial example of how to run this Specification - * From within sbt use: - * {{{ - * testOnly gcd.GcdDecoupledTester - * }}} - * From a terminal shell use: - * {{{ - * sbt 'testOnly gcd.GcdDecoupledTester' - * }}} - */ -object GCDSpec extends ChiselUtestTester { - val tests = Tests { - test("GCD") { - testCircuit(new DecoupledGcd(16)) { - dut => - dut.input.initSource() - dut.input.setSourceClock(dut.clock) - dut.output.initSink() - dut.output.setSinkClock(dut.clock) - val testValues = for {x <- 0 to 10; y <- 0 to 10} yield (x, y) - val inputSeq = testValues.map { case (x, y) => (new GcdInputBundle(16)).Lit(_.value1 -> x.U, _.value2 -> y.U) } - val resultSeq = testValues.map { case (x, y) => - (new GcdOutputBundle(16)).Lit(_.value1 -> x.U, _.value2 -> y.U, _.gcd -> BigInt(x).gcd(BigInt(y)).U) - } - fork { - // push inputs into the calculator, stall for 11 cycles one third of the way - val (seq1, seq2) = inputSeq.splitAt(resultSeq.length / 3) - dut.input.enqueueSeq(seq1) - dut.clock.step(11) - dut.input.enqueueSeq(seq2) - }.fork { - // retrieve computations from the calculator, stall for 10 cycles one half of the way - val (seq1, seq2) = resultSeq.splitAt(resultSeq.length / 2) - dut.output.expectDequeueSeq(seq1) - dut.clock.step(10) - dut.output.expectDequeueSeq(seq2) - }.join() - } - } - } -} diff --git a/chisel/playground/test/src/test.scala b/chisel/playground/test/src/test.scala new file mode 100644 index 0000000..7e3e19e --- /dev/null +++ b/chisel/playground/test/src/test.scala @@ -0,0 +1,18 @@ +// package cpu + +// import chisel3.stage.ChiselGeneratorAnnotation + +// import cpu.CpuConfig +// import cpu.pipeline.execute._ +// import cpu.pipeline.memory.DataMemoryAccess +// import cpu.pipeline.memory.MemoryUnit +// import cpu.pipeline.writeback.WriteBackUnit +// import cpu.pipeline.fetch.PreDecoder + +// object testMain extends App { +// implicit val config = new CpuConfig() +// (new chisel3.stage.ChiselStage).execute( +// Array("--target-dir", "generated"), +// Seq(ChiselGeneratorAnnotation(() => new PreDecoder())), +// ) +// }