From 301199c756b99ebcba8906a2e542c98c99138acc Mon Sep 17 00:00:00 2001 From: Liphen Date: Thu, 21 Dec 2023 15:24:57 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0icache=E6=88=90?= =?UTF-8?q?=E5=8A=9F=E7=94=9F=E6=88=90Verilog?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chisel/playground/resources/top_axi_wrapper.v | 80 +++--- chisel/playground/src/Core.scala | 5 + chisel/playground/src/cache/Cache.scala | 5 +- .../src/cache/CacheAXIInterface.scala | 86 +++---- chisel/playground/src/cache/DCache.scala | 52 ++-- chisel/playground/src/cache/ICache.scala | 243 ++++++++++++++---- chisel/playground/src/cache/mmu/ITlbL1.scala | 21 ++ chisel/playground/src/defines/Bundles.scala | 94 +++---- chisel/playground/src/defines/Const.scala | 1 + .../playground/src/defines/TlbBundles.scala | 124 +++++++++ 10 files changed, 502 insertions(+), 209 deletions(-) create mode 100644 chisel/playground/src/cache/mmu/ITlbL1.scala create mode 100644 chisel/playground/src/defines/TlbBundles.scala diff --git a/chisel/playground/resources/top_axi_wrapper.v b/chisel/playground/resources/top_axi_wrapper.v index 7fd3315..647dafe 100644 --- a/chisel/playground/resources/top_axi_wrapper.v +++ b/chisel/playground/resources/top_axi_wrapper.v @@ -47,46 +47,46 @@ module top_axi_wrapper( ); PuaCpu core( - .clock (clock), - .reset (reset), - // Interrupts - .io_ext_int_ei (MEI), // to PLIC - .io_ext_int_si (MSI), // to CLINT - .io_ext_int_ti (MTI), // to CLINT - // aw - .io_axi_aw_id (MAXI_awid), - .io_axi_aw_addr (MAXI_awaddr), - .io_axi_aw_len (MAXI_awlen), - .io_axi_aw_size (MAXI_awsize), - .io_axi_aw_burst (MAXI_awburst), - .io_axi_aw_valid (MAXI_awvalid), - .io_axi_aw_ready (MAXI_awready), - // w - .io_axi_w_data (MAXI_wdata), - .io_axi_w_strb (MAXI_wstrb), - .io_axi_w_last (MAXI_wlast), - .io_axi_w_valid (MAXI_wvalid), - .io_axi_w_ready (MAXI_wready), - // b - .io_axi_b_id (MAXI_bid), - .io_axi_b_resp (MAXI_bresp), - .io_axi_b_valid (MAXI_bvalid), - .io_axi_b_ready (MAXI_bready), - // ar - .io_axi_ar_id (MAXI_arid), - .io_axi_ar_addr (MAXI_araddr), - .io_axi_ar_len (MAXI_arlen), - .io_axi_ar_size (MAXI_arsize), - .io_axi_ar_burst (MAXI_arburst), - .io_axi_ar_valid (MAXI_arvalid), - .io_axi_ar_ready (MAXI_arready), - // r - .io_axi_r_id (MAXI_rid), - .io_axi_r_data (MAXI_rdata), - .io_axi_r_resp (MAXI_rresp), - .io_axi_r_last (MAXI_rlast), - .io_axi_r_valid (MAXI_rvalid), - .io_axi_r_ready (MAXI_rready), + .clock (clock), + .reset (reset), + // Interrupts + .io_ext_int_ei (MEI), // to PLIC + .io_ext_int_si (MSI), // to CLINT + .io_ext_int_ti (MTI), // to CLINT + // aw + .io_axi_aw_bits_id (MAXI_awid), + .io_axi_aw_bits_addr (MAXI_awaddr), + .io_axi_aw_bits_len (MAXI_awlen), + .io_axi_aw_bits_size (MAXI_awsize), + .io_axi_aw_bits_burst (MAXI_awburst), + .io_axi_aw_valid (MAXI_awvalid), + .io_axi_aw_ready (MAXI_awready), + // w + .io_axi_w_bits_data (MAXI_wdata), + .io_axi_w_bits_strb (MAXI_wstrb), + .io_axi_w_bits_last (MAXI_wlast), + .io_axi_w_valid (MAXI_wvalid), + .io_axi_w_ready (MAXI_wready), + // b + .io_axi_b_bits_id (MAXI_bid), + .io_axi_b_bits_resp (MAXI_bresp), + .io_axi_b_valid (MAXI_bvalid), + .io_axi_b_ready (MAXI_bready), + // ar + .io_axi_ar_bits_id (MAXI_arid), + .io_axi_ar_bits_addr (MAXI_araddr), + .io_axi_ar_bits_len (MAXI_arlen), + .io_axi_ar_bits_size (MAXI_arsize), + .io_axi_ar_bits_burst (MAXI_arburst), + .io_axi_ar_valid (MAXI_arvalid), + .io_axi_ar_ready (MAXI_arready), + // r + .io_axi_r_bits_id (MAXI_rid), + .io_axi_r_bits_data (MAXI_rdata), + .io_axi_r_bits_resp (MAXI_rresp), + .io_axi_r_bits_last (MAXI_rlast), + .io_axi_r_valid (MAXI_rvalid), + .io_axi_r_ready (MAXI_rready), // debug .io_debug_wb_pc (debug_pc), .io_debug_wb_rf_wen (debug_commit), diff --git a/chisel/playground/src/Core.scala b/chisel/playground/src/Core.scala index 8f2480f..c6b8ef3 100644 --- a/chisel/playground/src/Core.scala +++ b/chisel/playground/src/Core.scala @@ -15,6 +15,7 @@ import ctrl._ import mmu._ import chisel3.util.experimental.decode.decoder import cpu.pipeline.fetch.InstFifo +import cache.mmu.ITlbL1 class Core(implicit val config: CpuConfig) extends Module { val io = IO(new Bundle { @@ -37,6 +38,10 @@ class Core(implicit val config: CpuConfig) extends Module { val memoryUnit = Module(new MemoryUnit()).io val writeBackStage = Module(new WriteBackStage()).io val writeBackUnit = Module(new WriteBackUnit()).io + val tlbL1I = Module(new ITlbL1()).io + + tlbL1I.addr := fetchUnit.iCache.pc + tlbL1I.cache <> io.inst.tlb ctrl.decoderUnit <> decoderUnit.ctrl ctrl.executeUnit <> executeUnit.ctrl diff --git a/chisel/playground/src/cache/Cache.scala b/chisel/playground/src/cache/Cache.scala index 8d4f65f..11a9fa2 100644 --- a/chisel/playground/src/cache/Cache.scala +++ b/chisel/playground/src/cache/Cache.scala @@ -4,6 +4,7 @@ import chisel3._ import chisel3.util._ import cpu.defines._ import cpu.CpuConfig +import cpu.CacheConfig class Cache(implicit config: CpuConfig) extends Module { val io = IO(new Bundle { @@ -12,7 +13,9 @@ class Cache(implicit config: CpuConfig) extends Module { val axi = new AXI() }) - val icache = Module(new ICache()) + implicit val iCacheConfig = CacheConfig(nset = 64, nbank = 4, bankWidth = 16) + + val icache = Module(new ICache(iCacheConfig)) val dcache = Module(new DCache()) val axi_interface = Module(new CacheAXIInterface()) diff --git a/chisel/playground/src/cache/CacheAXIInterface.scala b/chisel/playground/src/cache/CacheAXIInterface.scala index 242b028..12b3692 100644 --- a/chisel/playground/src/cache/CacheAXIInterface.scala +++ b/chisel/playground/src/cache/CacheAXIInterface.scala @@ -12,32 +12,32 @@ class CacheAXIInterface extends Module { }) // pass-through aw { - io.axi.aw.id := io.dcache.aw.id - io.axi.aw.addr := io.dcache.aw.addr - io.axi.aw.len := io.dcache.aw.len - io.axi.aw.size := io.dcache.aw.size - io.axi.aw.burst := io.dcache.aw.burst - io.axi.aw.valid := io.dcache.aw.valid - io.axi.aw.prot := io.dcache.aw.prot - io.axi.aw.cache := io.dcache.aw.cache - io.axi.aw.lock := io.dcache.aw.lock - io.dcache.aw.ready := io.axi.aw.ready + io.axi.aw.bits.id := io.dcache.aw.bits.id + io.axi.aw.bits.addr := io.dcache.aw.bits.addr + io.axi.aw.bits.len := io.dcache.aw.bits.len + io.axi.aw.bits.size := io.dcache.aw.bits.size + io.axi.aw.bits.burst := io.dcache.aw.bits.burst + io.axi.aw.valid := io.dcache.aw.valid + io.axi.aw.bits.prot := io.dcache.aw.bits.prot + io.axi.aw.bits.cache := io.dcache.aw.bits.cache + io.axi.aw.bits.lock := io.dcache.aw.bits.lock + io.dcache.aw.ready := io.axi.aw.ready // pass-through aw } // pass-through w { - io.axi.w.id := io.dcache.w.id - io.axi.w.data := io.dcache.w.data - io.axi.w.strb := io.dcache.w.strb - io.axi.w.last := io.dcache.w.last - io.axi.w.valid := io.dcache.w.valid - io.dcache.w.ready := io.axi.w.ready + io.axi.w.bits.id := io.dcache.w.bits.id + io.axi.w.bits.data := io.dcache.w.bits.data + io.axi.w.bits.strb := io.dcache.w.bits.strb + io.axi.w.bits.last := io.dcache.w.bits.last + io.axi.w.valid := io.dcache.w.valid + io.dcache.w.ready := io.axi.w.ready // pass-through aw } // pass-through b { - io.dcache.b.id := io.axi.b.id - io.dcache.b.valid := io.axi.b.valid - io.dcache.b.resp := io.axi.b.resp - io.axi.b.ready := io.dcache.b.ready + io.dcache.b.bits.id := io.axi.b.bits.id + io.dcache.b.valid := io.axi.b.valid + io.dcache.b.bits.resp := io.axi.b.bits.resp + io.axi.b.ready := io.dcache.b.ready // pass-through b } // mux ar { @@ -55,31 +55,31 @@ class CacheAXIInterface extends Module { } } - io.axi.ar.id := Cat(0.U(3.W), ar_sel) - io.axi.ar.addr := Mux(ar_sel, io.dcache.ar.addr, io.icache.ar.addr) - io.axi.ar.len := Mux(ar_sel, io.dcache.ar.len, io.icache.ar.len) - io.axi.ar.size := Mux(ar_sel, io.dcache.ar.size, io.icache.ar.size) - io.axi.ar.burst := Mux(ar_sel, io.dcache.ar.burst, io.icache.ar.burst) - io.axi.ar.valid := Mux(ar_sel, io.dcache.ar.valid, io.icache.ar.valid) - io.axi.ar.prot := Mux(ar_sel, io.dcache.ar.prot, io.icache.ar.prot) - io.axi.ar.cache := Mux(ar_sel, io.dcache.ar.cache, io.icache.ar.cache) - io.axi.ar.lock := Mux(ar_sel, io.dcache.ar.lock, io.icache.ar.lock) - io.icache.ar.ready := !ar_sel && io.axi.ar.ready - io.dcache.ar.ready := ar_sel && io.axi.ar.ready + io.axi.ar.bits.id := Cat(0.U(3.W), ar_sel) + io.axi.ar.bits.addr := Mux(ar_sel, io.dcache.ar.bits.addr, io.icache.ar.bits.addr) + io.axi.ar.bits.len := Mux(ar_sel, io.dcache.ar.bits.len, io.icache.ar.bits.len) + io.axi.ar.bits.size := Mux(ar_sel, io.dcache.ar.bits.size, io.icache.ar.bits.size) + io.axi.ar.bits.burst := Mux(ar_sel, io.dcache.ar.bits.burst, io.icache.ar.bits.burst) + io.axi.ar.valid := Mux(ar_sel, io.dcache.ar.valid, io.icache.ar.valid) + io.axi.ar.bits.prot := Mux(ar_sel, io.dcache.ar.bits.prot, io.icache.ar.bits.prot) + io.axi.ar.bits.cache := Mux(ar_sel, io.dcache.ar.bits.cache, io.icache.ar.bits.cache) + io.axi.ar.bits.lock := Mux(ar_sel, io.dcache.ar.bits.lock, io.icache.ar.bits.lock) + io.icache.ar.ready := !ar_sel && io.axi.ar.ready + io.dcache.ar.ready := ar_sel && io.axi.ar.ready // mux ar } // mux r based on rid { - val r_sel = io.axi.r.id(0) - io.icache.r.id := io.axi.r.id - io.icache.r.data := io.axi.r.data - io.icache.r.resp := io.axi.r.resp - io.icache.r.last := io.axi.r.last - io.icache.r.valid := !r_sel && io.axi.r.valid - io.dcache.r.id := io.axi.r.id - io.dcache.r.data := io.axi.r.data - io.dcache.r.resp := io.axi.r.resp - io.dcache.r.last := io.axi.r.last - io.dcache.r.valid := r_sel && io.axi.r.valid - io.axi.r.ready := Mux(r_sel, io.dcache.r.ready, io.icache.r.ready) + val r_sel = io.axi.r.bits.id(0) + io.icache.r.bits.id := io.axi.r.bits.id + io.icache.r.bits.data := io.axi.r.bits.data + io.icache.r.bits.resp := io.axi.r.bits.resp + io.icache.r.bits.last := io.axi.r.bits.last + io.icache.r.valid := !r_sel && io.axi.r.valid + io.dcache.r.bits.id := io.axi.r.bits.id + io.dcache.r.bits.data := io.axi.r.bits.data + io.dcache.r.bits.resp := io.axi.r.bits.resp + io.dcache.r.bits.last := io.axi.r.bits.last + io.dcache.r.valid := r_sel && io.axi.r.valid + io.axi.r.ready := Mux(r_sel, io.dcache.r.ready, io.icache.r.ready) // mux r based on rid } } diff --git a/chisel/playground/src/cache/DCache.scala b/chisel/playground/src/cache/DCache.scala index f153706..0fc437e 100644 --- a/chisel/playground/src/cache/DCache.scala +++ b/chisel/playground/src/cache/DCache.scala @@ -24,39 +24,39 @@ class DCache(implicit config: CpuConfig) extends Module { val awvalid = RegInit(false.B) val awaddr = RegInit(0.U(AXI_ADDR_WID.W)) val awsize = RegInit(0.U(AXI_SIZE_WID.W)) - io.axi.aw.id := 1.U - io.axi.aw.addr := awaddr - io.axi.aw.len := 0.U - io.axi.aw.size := awsize - io.axi.aw.burst := BURST_INCR.U - io.axi.aw.valid := awvalid - io.axi.aw.prot := 0.U - io.axi.aw.lock := 0.U - io.axi.aw.cache := 0.U + io.axi.aw.bits.id := 1.U + io.axi.aw.bits.addr := awaddr + io.axi.aw.bits.len := 0.U + io.axi.aw.bits.size := awsize + io.axi.aw.bits.burst := BURST_INCR.U + io.axi.aw.valid := awvalid + io.axi.aw.bits.prot := 0.U + io.axi.aw.bits.lock := 0.U + io.axi.aw.bits.cache := 0.U val wvalid = RegInit(false.B) val wdata = RegInit(0.U(AXI_DATA_WID.W)) val wstrb = RegInit(0.U(AXI_STRB_WID.W)) - io.axi.w.id := 1.U - io.axi.w.data := wdata - io.axi.w.strb := wstrb - io.axi.w.last := 1.U - io.axi.w.valid := wvalid + io.axi.w.bits.id := 1.U + io.axi.w.bits.data := wdata + io.axi.w.bits.strb := wstrb + io.axi.w.bits.last := 1.U + io.axi.w.valid := wvalid io.axi.b.ready := 1.U val araddr = RegInit(0.U(AXI_ADDR_WID.W)) val arsize = RegInit(0.U(AXI_SIZE_WID.W)) val arvalid = RegInit(false.B) - io.axi.ar.id := 1.U - io.axi.ar.addr := araddr - io.axi.ar.len := 0.U - io.axi.ar.size := arsize - io.axi.ar.burst := BURST_INCR.U - io.axi.ar.valid := arvalid - io.axi.ar.prot := 0.U - io.axi.ar.cache := 0.U - io.axi.ar.lock := 0.U + io.axi.ar.bits.id := 1.U + io.axi.ar.bits.addr := araddr + io.axi.ar.bits.len := 0.U + io.axi.ar.bits.size := arsize + io.axi.ar.bits.burst := BURST_INCR.U + io.axi.ar.valid := arvalid + io.axi.ar.bits.prot := 0.U + io.axi.ar.bits.cache := 0.U + io.axi.ar.bits.lock := 0.U val rready = RegInit(false.B) io.axi.r.ready := rready @@ -102,8 +102,8 @@ class DCache(implicit config: CpuConfig) extends Module { arvalid := false.B } when(io.axi.r.valid) { - saved_rdata := io.axi.r.data - acc_err := io.axi.r.resp =/= RESP_OKEY.U + saved_rdata := io.axi.r.bits.data + acc_err := io.axi.r.bits.resp =/= RESP_OKEY.U status := s_idle } } @@ -115,7 +115,7 @@ class DCache(implicit config: CpuConfig) extends Module { wvalid := false.B } when(io.axi.b.valid) { - acc_err := io.axi.b.resp =/= RESP_OKEY.U + acc_err := io.axi.b.bits.resp =/= RESP_OKEY.U status := s_idle } } diff --git a/chisel/playground/src/cache/ICache.scala b/chisel/playground/src/cache/ICache.scala index 4bcf11c..efff650 100644 --- a/chisel/playground/src/cache/ICache.scala +++ b/chisel/playground/src/cache/ICache.scala @@ -1,70 +1,201 @@ -// * Cache 设计借鉴了nscscc2021 cqu的cdim * // package cache import chisel3._ import chisel3.util._ import memory._ +import cpu.CacheConfig import cpu.defines._ import cpu.CpuConfig import cpu.defines.Const._ -class ICache(implicit config: CpuConfig) extends Module { +class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module { + val nway: Int = cacheConfig.nway + val nset: Int = cacheConfig.nset + val nbank: Int = cacheConfig.nbank + val ninst: Int = cacheConfig.ninst // 取指令的数量 + val bankOffsetWidth: Int = cacheConfig.bankOffsetWidth + val bankWidth: Int = cacheConfig.bankWidth + val tagWidth: Int = cacheConfig.tagWidth + val indexWidth: Int = cacheConfig.indexWidth + val offsetWidth: Int = cacheConfig.offsetWidth val io = IO(new Bundle { val cpu = Flipped(new Cache_ICache()) val axi = new ICache_AXIInterface() }) + require(isPow2(ninst), "ninst must be power of 2") + // * addr organization * // + // ====================================== + // | tag | index |offset| + // |31 12|11 6|5 0| + // ====================================== + // | offset | + // | bank index | bank offset | + // | 5 4 | 3 2 | + // ============================ + val tlb_fill = RegInit(false.B) + // * fsm * // val s_idle :: s_uncached :: s_replace :: s_save :: Nil = Enum(4) - val status = RegInit(s_idle) + val state = RegInit(s_idle) - val read_next_addr = (status === s_idle || status === s_save) - val pc = Cat(io.cpu.addr(read_next_addr)(31, 2), 0.U(2.W)) + // * nway * nset * // + // * 128 bit for 4 inst * // + // ========================================================= + // | valid | tag | bank 0 | bank 1 | bank 2 | bank 3 | + // | 1 | 20 | 128 | 128 | 128 | 128 | + // ========================================================= + // | bank | + // | inst 0 | inst 1 | inst 2 | inst 3 | + // | 32 | 32 | 32 | 32 | + // ===================================== + val instperbank = bankWidth / 4 // 每个bank存储的指令数 + val valid = RegInit(VecInit(Seq.fill(nset * nbank)(VecInit(Seq.fill(instperbank)(false.B))))) - // default - val arvalid = RegInit(false.B) - val araddr = RegInit(0.U(AXI_ADDR_WID.W)) - io.axi.ar.id := 0.U - io.axi.ar.addr := araddr - io.axi.ar.len := 0.U - io.axi.ar.size := 2.U - io.axi.ar.lock := 0.U - io.axi.ar.burst := BURST_INCR.U - io.axi.ar.valid := arvalid - io.axi.ar.prot := 0.U - io.axi.ar.cache := 0.U + val data = Wire(Vec(nway, Vec(instperbank, UInt(DATA_WID.W)))) + val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W)))) - val rready = RegInit(false.B) - val saved = RegInit(VecInit(Seq.fill(config.instFetchNum)(0.U.asTypeOf(new Bundle { - val inst = UInt(AXI_DATA_WID.W) + // * should choose next addr * // + val should_next_addr = (state === s_idle && !tlb_fill) || (state === s_save) + + val data_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, bankOffsetWidth) + val data_wstrb = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(instperbank)(0.U(4.W)))))) + + val tag_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, offsetWidth) + val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B))) + val tag_wdata = RegInit(0.U(tagWidth.W)) + + // * lru * // + val lru = RegInit(VecInit(Seq.fill(nset * nbank)(false.B))) + + // * itlb * // + when(tlb_fill) { tlb_fill := false.B } + io.cpu.tlb.fill := tlb_fill + io.cpu.tlb.icache_is_save := (state === s_save) + + // * fence * // + when(io.cpu.fence && !io.cpu.icache_stall && io.cpu.cpu_ready) { + valid.map(_ := VecInit(Seq.fill(instperbank)(false.B))) + } + + // * replace set * // + val rset = RegInit(0.U(6.W)) + + // * virtual set * // + val vset = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth) + + // * cache hit * // + val tag_compare_valid = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.tag && valid(vset)(i))) + val cache_hit = tag_compare_valid.contains(true.B) + val cache_hit_available = cache_hit && io.cpu.tlb.translation_ok && !io.cpu.tlb.uncached + val sel = tag_compare_valid(1) + + val bank_offset = io.cpu.addr(0)(log2Ceil(instperbank) + 1, 2) + val inst = VecInit( + Seq.tabulate(instperbank)(i => Mux(i.U <= (3.U - bank_offset), data(sel)(i.U + bank_offset), 0.U)) + ) + val inst_valid = VecInit(Seq.tabulate(instperbank)(i => cache_hit_available && i.U <= (3.U - bank_offset))) + + val saved = RegInit(VecInit(Seq.fill(instperbank)(0.U.asTypeOf(new Bundle { + val inst = UInt(PC_WID.W) val valid = Bool() })))) - io.axi.r.ready := true.B + + val axi_cnt = Counter(cacheConfig.burstSize) + + // bank tag ram + for { i <- 0 until nway; j <- 0 until instperbank } { + val bank = Module(new SimpleDualPortRam(nset * nbank, INST_BANK_WID, byteAddressable = true)) + bank.io.ren := true.B + bank.io.raddr := data_raddr + data(i)(j) := bank.io.rdata + + bank.io.wen := data_wstrb(i)(j).orR + bank.io.waddr := Cat(rset, axi_cnt.value(log2Ceil(cacheConfig.burstSize) - 1, log2Ceil(instperbank))) + bank.io.wdata := Mux( + j.U === axi_cnt.value(log2Ceil(instperbank) - 1, 0), + Mux(axi_cnt.value(0) === 0.U, io.axi.r.bits.data(31, 0), io.axi.r.bits.data(63, 32)), + 0.U + ) + bank.io.wstrb := data_wstrb(i)(j) + } + + for { i <- 0 until ninst } { + io.cpu.inst_valid(i) := Mux(state === s_idle && !tlb_fill, inst_valid(i), saved(i).valid) && io.cpu.req + io.cpu.inst(i) := Mux(state === s_idle && !tlb_fill, inst(i), saved(i).inst) + } + + for { i <- 0 until nway } { + val tag_bram = Module(new LUTRam(nset, tagWidth)) + tag_bram.io.raddr := tag_raddr + tag(i) := tag_bram.io.rdata + + tag_bram.io.wen := tag_wstrb(i) + tag_bram.io.waddr := rset + tag_bram.io.wdata := tag_wdata + } + + io.cpu.icache_stall := Mux(state === s_idle && !tlb_fill, (!cache_hit_available && io.cpu.req), state =/= s_save) + + val ar_init = WireInit(0.U.asTypeOf(new AR())) + ar_init.burst := 1.U + val ar = RegInit(ar_init) + val arvalid = RegInit(false.B) + ar <> io.axi.ar.bits + arvalid <> io.axi.ar.valid + + val r = RegInit(0.U.asTypeOf(new R())) + val rready = RegInit(false.B) + r <> io.axi.r.bits + rready <> io.axi.r.ready val acc_err = RegInit(false.B) - val addr_err = io.cpu.addr(read_next_addr)(63, 32).orR + val addr_err = io.cpu.addr(should_next_addr)(XLEN - 1, PADDR_WID).orR - (0 until config.instFetchNum).foreach(i => { - io.cpu.inst(i) := Mux(status === s_idle && !acc_err, 0.U, saved(i).inst) - io.cpu.inst_valid(i) := Mux(status === s_idle && !acc_err, false.B, saved(i).valid) && io.cpu.req - }) + when(acc_err) { acc_err := false.B } + io.cpu.acc_err := acc_err - io.cpu.acc_err := acc_err - io.cpu.icache_stall := Mux(status === s_idle && !acc_err, io.cpu.req, status =/= s_save) - - switch(status) { + switch(state) { is(s_idle) { - acc_err := false.B - when(io.cpu.req) { + when(tlb_fill) { + when(!io.cpu.tlb.hit) { + state := s_save + saved(0).inst := 0.U + saved(0).valid := true.B + } + }.elsewhen(io.cpu.req) { when(addr_err) { acc_err := true.B + state := s_save + saved(0).inst := 0.U saved(0).valid := true.B - status := s_save - }.otherwise { - araddr := pc - arvalid := true.B - io.axi.ar.len := 0.U - io.axi.ar.size := 2.U - status := s_uncached + }.elsewhen(!io.cpu.tlb.translation_ok) { + tlb_fill := true.B + }.elsewhen(io.cpu.tlb.uncached) { + state := s_uncached + ar.addr := io.cpu.tlb.pa + ar.len := 0.U(log2Ceil((nbank * bankWidth) / 4).W) + ar.size := 2.U(bankOffsetWidth.W) + arvalid := true.B + }.elsewhen(!cache_hit) { + state := s_replace + ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W)) + ar.len := 15.U(log2Ceil((nbank * bankWidth) / 4).W) + ar.size := 2.U(bankOffsetWidth.W) + arvalid := true.B + + rset := vset + (0 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := Mux(i.U === 0.U, 0xf.U, 0x0.U)) + tag_wstrb(lru(vset)) := true.B + tag_wdata := io.cpu.tlb.tag + valid(vset)(lru(vset)) := true.B + axi_cnt.reset() + }.elsewhen(!io.cpu.icache_stall) { + lru(vset) := ~sel + when(!io.cpu.cpu_ready) { + state := s_save + (1 until instperbank).foreach(i => saved(i).inst := data(sel)(i)) + (0 until instperbank).foreach(i => saved(i).valid := inst_valid(i)) + } } } } @@ -74,18 +205,40 @@ class ICache(implicit config: CpuConfig) extends Module { arvalid := false.B rready := true.B } - }.elsewhen(io.axi.r.valid && io.axi.r.ready) { - saved(0).inst := Mux(araddr(2), io.axi.r.data(63, 32), io.axi.r.data(31, 0)) + }.elsewhen(io.axi.r.fire) { + // * uncached not support burst transport * // + state := s_save + saved(0).inst := io.axi.r.bits.data saved(0).valid := true.B - acc_err := io.axi.r.resp =/= RESP_OKEY.U rready := false.B - status := s_save + acc_err := io.axi.r.bits.resp =/= RESP_OKEY.U + } + } + is(s_replace) { + when(io.axi.ar.valid) { + when(io.axi.ar.ready) { + arvalid := false.B + rready := true.B + } + }.elsewhen(io.axi.r.fire) { + // * burst transport * // + when(!io.axi.r.bits.last) { + axi_cnt.inc() + data_wstrb(lru(vset))(0) := data_wstrb(lru(vset))(instperbank - 1) + (1 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := data_wstrb(lru(vset))(i - 1)) + }.otherwise { + rready := false.B + data_wstrb(lru(vset)) := 0.U.asTypeOf(Vec(instperbank, UInt(4.W))) + tag_wstrb(lru(vset)) := false.B + } + }.elsewhen(!io.axi.r.ready) { + state := s_idle } } is(s_save) { when(io.cpu.cpu_ready && !io.cpu.icache_stall) { - status := s_idle - (0 until config.instFetchNum).foreach(i => saved(i).valid := false.B) + state := s_idle + (0 until instperbank).foreach(i => saved(i).valid := false.B) } } } diff --git a/chisel/playground/src/cache/mmu/ITlbL1.scala b/chisel/playground/src/cache/mmu/ITlbL1.scala new file mode 100644 index 0000000..88ab185 --- /dev/null +++ b/chisel/playground/src/cache/mmu/ITlbL1.scala @@ -0,0 +1,21 @@ +package cache.mmu + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ + +class ITlbL1 extends Module { + val io = IO(new Bundle { + val addr = Input(UInt(PC_WID.W)) + val cache = new Tlb_ICache() + }) + val vpn = io.addr(31, 12) + val direct_mapped = io.addr(31, 30) === 2.U(2.W) + + io.cache.uncached := AddressSpace.isMMIO(io.addr) + io.cache.translation_ok := true.B + io.cache.hit := true.B + io.cache.tag := io.addr(31, 12) + io.cache.pa := Cat(io.cache.tag, io.addr(11, 0)) +} diff --git a/chisel/playground/src/defines/Bundles.scala b/chisel/playground/src/defines/Bundles.scala index 3992d58..bad975e 100644 --- a/chisel/playground/src/defines/Bundles.scala +++ b/chisel/playground/src/defines/Bundles.scala @@ -113,6 +113,9 @@ class Cache_ICache(implicit val config: CpuConfig) extends Bundle { val inst_valid = Input(Vec(config.instFetchNum, Bool())) val acc_err = Input(Bool()) val icache_stall = Input(Bool()) // icache_stall + + // tlb + val tlb = new Tlb_ICache() } // cpu to dcache @@ -135,72 +138,55 @@ class Cache_DCache extends Bundle { // master -> slave class AR extends Bundle { - val id = Output(UInt(AXI_ID_WID.W)) - val addr = Output(UInt(AXI_ADDR_WID.W)) - val len = Output(UInt(AXI_LEN_WID.W)) - val size = Output(UInt(AXI_SIZE_WID.W)) - val burst = Output(UInt(AXI_BURST_WID.W)) - val lock = Output(UInt(AXI_LOCK_WID.W)) - val cache = Output(UInt(AXI_CACHE_WID.W)) - val prot = Output(UInt(AXI_PROT_WID.W)) - val valid = Output(Bool()) - - val ready = Input(Bool()) + val id = UInt(AXI_ID_WID.W) + val addr = UInt(AXI_ADDR_WID.W) + val len = UInt(AXI_LEN_WID.W) + val size = UInt(AXI_SIZE_WID.W) + val burst = UInt(AXI_BURST_WID.W) + val lock = UInt(AXI_LOCK_WID.W) + val cache = UInt(AXI_CACHE_WID.W) + val prot = UInt(AXI_PROT_WID.W) } class R extends Bundle { - val ready = Output(Bool()) - - val id = Input(UInt(AXI_ID_WID.W)) - val data = Input(UInt(AXI_DATA_WID.W)) - val resp = Input(UInt(AXI_RESP_WID.W)) - val last = Input(Bool()) - val valid = Input(Bool()) + val id = UInt(AXI_ID_WID.W) + val data = UInt(AXI_DATA_WID.W) + val resp = UInt(AXI_RESP_WID.W) + val last = Bool() } class AW extends Bundle { - val id = Output(UInt(AXI_ID_WID.W)) - val addr = Output(UInt(AXI_ADDR_WID.W)) - val len = Output(UInt(AXI_LEN_WID.W)) - val size = Output(UInt(AXI_SIZE_WID.W)) - val burst = Output(UInt(AXI_BURST_WID.W)) - val lock = Output(UInt(AXI_LOCK_WID.W)) - val cache = Output(UInt(AXI_CACHE_WID.W)) - val prot = Output(UInt(AXI_PROT_WID.W)) - val valid = Output(Bool()) - - val ready = Input(Bool()) + val id = UInt(AXI_ID_WID.W) + val addr = UInt(AXI_ADDR_WID.W) + val len = UInt(AXI_LEN_WID.W) + val size = UInt(AXI_SIZE_WID.W) + val burst = UInt(AXI_BURST_WID.W) + val lock = UInt(AXI_LOCK_WID.W) + val cache = UInt(AXI_CACHE_WID.W) + val prot = UInt(AXI_PROT_WID.W) } class W extends Bundle { - val id = Output(UInt(AXI_ID_WID.W)) - val data = Output(UInt(AXI_DATA_WID.W)) - val strb = Output(UInt(AXI_STRB_WID.W)) - val last = Output(Bool()) - val valid = Output(Bool()) - - val ready = Input(Bool()) + val id = UInt(AXI_ID_WID.W) + val data = UInt(AXI_DATA_WID.W) + val strb = UInt(AXI_STRB_WID.W) + val last = Bool() } class B extends Bundle { - val ready = Output(Bool()) - - val id = Input(UInt(AXI_ID_WID.W)) - val resp = Input(UInt(AXI_RESP_WID.W)) - val valid = Input(Bool()) + val id = UInt(AXI_ID_WID.W) + val resp = UInt(AXI_RESP_WID.W) } class ICache_AXIInterface extends Bundle { - val ar = new AR() - val r = new R() + val ar = Decoupled(new AR()) + val r = Flipped(Decoupled(new R())) } -class DCache_AXIInterface extends Bundle { - val aw = new AW() - val w = new W() - val b = new B() - val ar = new AR() - val r = new R() +class DCache_AXIInterface extends ICache_AXIInterface { + val aw = Decoupled(new AW()) + val w = Decoupled(new W()) + val b = Flipped(Decoupled(new B())) } class Cache_AXIInterface extends Bundle { @@ -211,11 +197,11 @@ class Cache_AXIInterface extends Bundle { // AXI interface class AXI extends Bundle { - val ar = new AR() // read address channel - val r = new R() // read data channel - val aw = new AW() // write address channel - val w = new W() // write data channel - val b = new B() // write response channel + val ar = Decoupled(new AR()) // read address channel + val r = Flipped(Decoupled(new R())) // read data channel + val aw = Decoupled(new AW()) // write address channel + val w = Decoupled(new W()) // write data channel + val b = Flipped(Decoupled(new B())) // write response channel } class DEBUG extends Bundle { diff --git a/chisel/playground/src/defines/Const.scala b/chisel/playground/src/defines/Const.scala index 3dd09bd..7ab27e8 100644 --- a/chisel/playground/src/defines/Const.scala +++ b/chisel/playground/src/defines/Const.scala @@ -20,6 +20,7 @@ trait Constants extends CoreParameter { val EXC_WID = 16 // inst rom + val INST_BANK_WID = 32 val INST_WID = XLEN val INST_ADDR_WID = XLEN diff --git a/chisel/playground/src/defines/TlbBundles.scala b/chisel/playground/src/defines/TlbBundles.scala new file mode 100644 index 0000000..7a959cf --- /dev/null +++ b/chisel/playground/src/defines/TlbBundles.scala @@ -0,0 +1,124 @@ +package cpu.defines + +import chisel3._ +import chisel3.util._ + +sealed trait Sv39Const extends CoreParameter { + val PAddrBits = PADDR_WID + val Level = 3 + val offLen = 12 + val ppn0Len = 9 + val ppn1Len = 9 + val ppn2Len = PAddrBits - offLen - ppn0Len - ppn1Len // 2 + val ppnLen = ppn2Len + ppn1Len + ppn0Len + val vpn2Len = 9 + val vpn1Len = 9 + val vpn0Len = 9 + val vpnLen = vpn2Len + vpn1Len + vpn0Len + + //val paddrLen = PAddrBits + //val vaddrLen = VAddrBits + val satpLen = XLEN + val satpModeLen = 4 + val asidLen = 16 + val flagLen = 8 + + val ptEntryLen = XLEN + val satpResLen = XLEN - ppnLen - satpModeLen - asidLen + //val vaResLen = 25 // unused + //val paResLen = 25 // unused + val pteResLen = XLEN - ppnLen - 2 - flagLen + + def vaBundle = new Bundle { + val vpn2 = UInt(vpn2Len.W) + val vpn1 = UInt(vpn1Len.W) + val vpn0 = UInt(vpn0Len.W) + val off = UInt(offLen.W) + } + + def vaBundle2 = new Bundle { + val vpn = UInt(vpnLen.W) + val off = UInt(offLen.W) + } + + def vaBundle3 = new Bundle { + val vpn = UInt(vpnLen.W) + val off = UInt(offLen.W) + } + + def vpnBundle = new Bundle { + val vpn2 = UInt(vpn2Len.W) + val vpn1 = UInt(vpn1Len.W) + val vpn0 = UInt(vpn0Len.W) + } + + def paBundle = new Bundle { + val ppn2 = UInt(ppn2Len.W) + val ppn1 = UInt(ppn1Len.W) + val ppn0 = UInt(ppn0Len.W) + val off = UInt(offLen.W) + } + + def paBundle2 = new Bundle { + val ppn = UInt(ppnLen.W) + val off = UInt(offLen.W) + } + + def paddrApply(ppn: UInt, vpnn: UInt): UInt = { + Cat(Cat(ppn, vpnn), 0.U(3.W)) + } + + def pteBundle = new Bundle { + val reserved = UInt(pteResLen.W) + val ppn = UInt(ppnLen.W) + val rsw = UInt(2.W) + val flag = new Bundle { + val d = UInt(1.W) + val a = UInt(1.W) + val g = UInt(1.W) + val u = UInt(1.W) + val x = UInt(1.W) + val w = UInt(1.W) + val r = UInt(1.W) + val v = UInt(1.W) + } + } + + def satpBundle = new Bundle { + val mode = UInt(satpModeLen.W) + val asid = UInt(asidLen.W) + val res = UInt(satpResLen.W) + val ppn = UInt(ppnLen.W) + } + + def flagBundle = new Bundle { + val d = Bool() + val a = Bool() + val g = Bool() + val u = Bool() + val x = Bool() + val w = Bool() + val r = Bool() + val v = Bool() + } + + def maskPaddr(ppn: UInt, vaddr: UInt, mask: UInt) = { + MaskData(vaddr, Cat(ppn, 0.U(offLen.W)), Cat(Fill(ppn2Len, 1.U(1.W)), mask, 0.U(offLen.W))) + } + + def MaskEQ(mask: UInt, pattern: UInt, vpn: UInt) = { + (Cat("h1ff".U(vpn2Len.W), mask) & pattern) === (Cat("h1ff".U(vpn2Len.W), mask) & vpn) + } + +} + +class Tlb_ICache extends Bundle { + val fill = Input(Bool()) + val icache_is_save = Input(Bool()) + val uncached = Output(Bool()) + + val translation_ok = Output(Bool()) + val hit = Output(Bool()) + val tag = Output(UInt(20.W)) + val pa = Output(UInt(32.W)) +}