From e646ee4a4c8d3cd48bca3d0fc912bc4a4376c585 Mon Sep 17 00:00:00 2001 From: Liphen Date: Sun, 24 Dec 2023 16:21:53 +0800 Subject: [PATCH] =?UTF-8?q?docs(cache):=20=E5=A2=9E=E5=8A=A0=E6=B3=A8?= =?UTF-8?q?=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chisel/playground/src/cache/DCache.scala | 219 ++++++++++++++--------- chisel/playground/src/cache/ICache.scala | 127 +++++++------ 2 files changed, 205 insertions(+), 141 deletions(-) diff --git a/chisel/playground/src/cache/DCache.scala b/chisel/playground/src/cache/DCache.scala index e86bffd..601f451 100644 --- a/chisel/playground/src/cache/DCache.scala +++ b/chisel/playground/src/cache/DCache.scala @@ -8,6 +8,41 @@ import cpu.defines._ import cpu.CpuConfig import cpu.defines.Const._ +/* + 整个宽度为PADDR_WID的地址 + ========================================================== + | tag | index | offset | + | | | bank index | bank offset | + ========================================================== + + nway 组,nindex 行 + ============================================================== + | valid | dirty | tag | bank 0 | bank 1 | ... | bank n | + | 1 | 1 | | | | | | + ============================================================== + | bank | + | data 0 | data 1 | ... | data n | + | XLEN | XLEN | ... | XLEN | + ===================================== + + 本 CPU 的实现如下: + 每个bank分为多个dataBlocks,每个dataBlocks的宽度为AXI_DATA_WID,这样能方便的和AXI总线进行交互 + RV64实现中AXI_DATA_WID为64,所以每个dataBlocks可以存储1个数据 + 为了简化设计,目前一个bank中只有一个dataBlocks,即每个bank中只能存储一个数据 + 这样的话dataBlocks可以被简化掉,直接用bank代替 + //TODO:解决AXI_DATA_WID小于XLEN的情况 + + ============================================================== + | valid | dirty | tag | bank 0 | bank 1 | ... | bank n | + | 1 | 1 | | | | | | + ============================================================== + | bank | + | dataBlocks | + | data 0 | + | 64 | + =================== + */ + class WriteBufferUnit extends Bundle { val data = UInt(XLEN.W) val addr = UInt(DATA_ADDR_WID.W) @@ -16,36 +51,46 @@ class WriteBufferUnit extends Bundle { } class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module { - val nway: Int = cacheConfig.nway - val nindex: Int = cacheConfig.nindex - val nbank: Int = cacheConfig.nbank - val bitsPerBank: Int = cacheConfig.bitsPerBank - val tagWidth: Int = cacheConfig.tagWidth - val burstSize: Int = 16 + val nway: Int = cacheConfig.nway + val nindex: Int = cacheConfig.nindex + val nbank: Int = cacheConfig.nbank + val tagWidth: Int = cacheConfig.tagWidth + val burstSize: Int = 16 + val writeFifoDepth: Int = 4 + + // 每个bank中存AXI_DATA_WID位的数据 + // TODO:目前的实现只保证了AXI_DATA_WID为XLEN的情况下的正确性 + require(AXI_DATA_WID == XLEN, "AXI_DATA_WID should be greater than XLEN") val io = IO(new Bundle { val cpu = Flipped(new Cache_DCache()) val axi = new DCache_AXIInterface() }) - val tlb_fill = RegInit(false.B) // * fsm * // val s_idle :: s_uncached :: s_writeback :: s_replace :: s_save :: Nil = Enum(5) val state = RegInit(s_idle) - io.cpu.tlb.fill := tlb_fill - io.cpu.tlb.dcache_is_idle := state === s_idle + val tlb_fill = RegInit(false.B) + io.cpu.tlb.fill := tlb_fill + + // 每个bank中只有一个dataBlocks + val dataBlocksPerBank = 1 + // axi信号中size的宽度,对于cached段,size为3位 + val cached_size = log2Ceil(AXI_DATA_WID / 8) + val cached_len = (nbank * dataBlocksPerBank - 1) // * valid dirty * // + // 每行有一个有效位和一个脏位 val valid = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B))))) val dirty = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B))))) - val lru = RegInit(VecInit(Seq.fill(nindex)(0.U(1.W)))) + val lru = RegInit(VecInit(Seq.fill(nindex)(false.B))) // TODO:支持更多路数,目前只支持2路 - val write_fifo = Module(new Queue(new WriteBufferUnit(), 4)) + val writeFifo = Module(new Queue(new WriteBufferUnit(), writeFifoDepth)) - write_fifo.io.enq.valid := false.B - write_fifo.io.enq.bits := 0.U.asTypeOf(new WriteBufferUnit()) - write_fifo.io.deq.ready := false.B + writeFifo.io.enq.valid := false.B + writeFifo.io.enq.bits := 0.U.asTypeOf(new WriteBufferUnit()) + writeFifo.io.deq.ready := false.B val axi_cnt = Counter(burstSize) val read_ready_cnt = RegInit(0.U(4.W)) @@ -54,14 +99,14 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul // * victim cache * // val victim = RegInit(0.U.asTypeOf(new Bundle { val valid = Bool() - val set = UInt(6.W) + val index = UInt(6.W) val waddr = UInt(10.W) val wstrb = Vec(nway, UInt(AXI_STRB_WID.W)) val working = Bool() val writeback = Bool() })) val victim_cnt = Counter(burstSize) - val victim_addr = Cat(victim.set, victim_cnt.value) + val victim_addr = Cat(victim.index, victim_cnt.value) val fset = io.cpu.addr(11, 6) val fence = RegInit(0.U.asTypeOf(new Bundle { @@ -77,7 +122,7 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul val data_waddr = Mux(victim.valid, victim.waddr, io.cpu.addr(11, 2)) val data_wdata = Mux(state === s_replace, io.axi.r.bits.data, io.cpu.wdata) - val tag_raddr = Mux(victim.valid, victim.set, io.cpu.addr(11, 6)) + val tag_raddr = Mux(victim.valid, victim.index, io.cpu.addr(11, 6)) val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B))) val tag_wdata = RegInit(0.U(tagWidth.W)) @@ -88,13 +133,13 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul val cache_hit = tag_compare_valid.contains(true.B) val mmio_read_stall = io.cpu.tlb.uncached && !io.cpu.wen.orR - val mmio_write_stall = io.cpu.tlb.uncached && io.cpu.wen.orR && !write_fifo.io.enq.ready + val mmio_write_stall = io.cpu.tlb.uncached && io.cpu.wen.orR && !writeFifo.io.enq.ready val cached_stall = !io.cpu.tlb.uncached && !cache_hit val sel = tag_compare_valid(1) - // * physical set * // - val pset = io.cpu.addr(11, 6) + // * physical index * // + val physical_index = io.cpu.addr(11, 6) val dcache_stall = Mux( state === s_idle && !tlb_fill, @@ -115,25 +160,25 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul // bank tagv ram for { i <- 0 until nway } { - val bank_ram = Module(new SimpleDualPortRam(nindex * nbank, bitsPerBank, byteAddressable = true)) - bank_ram.io.ren := true.B - bank_ram.io.raddr := data_raddr - data(i) := bank_ram.io.rdata + val bank = Module(new SimpleDualPortRam(nindex * nbank, AXI_DATA_WID, byteAddressable = true)) + bank.io.ren := true.B + bank.io.raddr := data_raddr + data(i) := bank.io.rdata - bank_ram.io.wen := data_wstrb(i).orR - bank_ram.io.waddr := data_waddr - bank_ram.io.wdata := data_wdata - bank_ram.io.wstrb := data_wstrb(i) + bank.io.wen := data_wstrb(i).orR + bank.io.waddr := data_waddr + bank.io.wdata := data_wdata + bank.io.wstrb := data_wstrb(i) - val tag_ram = Module(new LUTRam(nindex, tagWidth)) - tag_ram.io.raddr := tag_raddr - tag(i) := tag_ram.io.rdata + val tagRam = Module(new LUTRam(nindex, tagWidth)) + tagRam.io.raddr := tag_raddr + tag(i) := tagRam.io.rdata - tag_ram.io.wen := tag_wstrb(i) - tag_ram.io.waddr := victim.set - tag_ram.io.wdata := tag_wdata + tagRam.io.wen := tag_wstrb(i) + tagRam.io.waddr := victim.index + tagRam.io.wdata := tag_wdata - tag_compare_valid(i) := tag(i) === io.cpu.tlb.tag && valid(pset)(i) && io.cpu.tlb.translation_ok + tag_compare_valid(i) := tag(i) === io.cpu.tlb.tag && valid(physical_index)(i) && io.cpu.tlb.translation_ok cache_data_forward(i) := Mux( last_waddr === io.cpu.addr(11, 2), ((last_wstrb(i) & last_wdata) | (data(i) & (~last_wstrb(i)))), @@ -188,13 +233,13 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul when(io.axi.b.fire) { write_buffer_axi_busy := false.B } - }.elsewhen(write_fifo.io.deq.valid) { - write_fifo.io.deq.ready := write_fifo.io.deq.valid - when(write_fifo.io.deq.fire) { - aw.addr := write_fifo.io.deq.bits.addr - aw.size := write_fifo.io.deq.bits.size - w.data := write_fifo.io.deq.bits.data - w.strb := write_fifo.io.deq.bits.strb + }.elsewhen(writeFifo.io.deq.valid) { + writeFifo.io.deq.ready := writeFifo.io.deq.valid + when(writeFifo.io.deq.fire) { + aw.addr := writeFifo.io.deq.bits.addr + aw.size := writeFifo.io.deq.bits.size + w.data := writeFifo.io.deq.bits.data + w.strb := writeFifo.io.deq.bits.strb } aw.len := 0.U awvalid := true.B @@ -221,23 +266,23 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul } }.elsewhen(io.cpu.tlb.uncached) { when(io.cpu.wen.orR) { - when(write_fifo.io.enq.ready && !current_mmio_write_saved) { - write_fifo.io.enq.valid := true.B - write_fifo.io.enq.bits.addr := Mux( + when(writeFifo.io.enq.ready && !current_mmio_write_saved) { + writeFifo.io.enq.valid := true.B + writeFifo.io.enq.bits.addr := Mux( io.cpu.rlen === 2.U, Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)), io.cpu.tlb.pa ) - write_fifo.io.enq.bits.size := io.cpu.rlen - write_fifo.io.enq.bits.strb := io.cpu.wstrb - write_fifo.io.enq.bits.data := io.cpu.wdata + writeFifo.io.enq.bits.size := io.cpu.rlen + writeFifo.io.enq.bits.strb := io.cpu.wstrb + writeFifo.io.enq.bits.data := io.cpu.wdata current_mmio_write_saved := true.B } when(io.cpu.dcache_ready && io.cpu.cpu_ready) { current_mmio_write_saved := false.B } - }.elsewhen(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) { + }.elsewhen(!(writeFifo.io.deq.valid || write_buffer_axi_busy)) { ar.addr := Mux(io.cpu.rlen === 2.U, Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)), io.cpu.tlb.pa) ar.len := 0.U ar.size := io.cpu.rlen @@ -249,19 +294,19 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul when(!cache_hit) { state := s_replace axi_cnt.reset() - victim.set := pset + victim.index := physical_index victim_cnt.reset() - read_ready_set := pset + read_ready_set := physical_index read_ready_cnt := 0.U - victim.waddr := Cat(pset, 0.U(4.W)) + victim.waddr := Cat(physical_index, 0.U(4.W)) victim.valid := true.B - victim.writeback := dirty(pset)(lru(pset)) + victim.writeback := dirty(physical_index)(lru(physical_index)) }.otherwise { when(io.cpu.dcache_ready) { // update lru and mark dirty - lru(pset) := ~sel + lru(physical_index) := ~sel when(io.cpu.wen.orR) { - dirty(pset)(sel) := true.B + dirty(physical_index)(sel) := true.B } when(!io.cpu.cpu_ready) { saved_rdata := cache_data_forward(sel) @@ -272,10 +317,10 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul } }.elsewhen(io.cpu.fence) { when(dirty(fset).contains(true.B)) { - when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) { + when(!(writeFifo.io.deq.valid || write_buffer_axi_busy)) { state := s_writeback axi_cnt.reset() - victim.set := fset + victim.index := fset victim_cnt.reset() read_ready_set := fset read_ready_cnt := 0.U @@ -305,16 +350,16 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul when(victim_cnt.value =/= (burstSize - 1).U) { victim_cnt.inc() } - read_ready_set := victim.set + read_ready_set := victim.index read_ready_cnt := victim_cnt.value read_buffer(read_ready_cnt) := data(dirty(fset)(1)) when(!aw_handshake) { aw.addr := Cat(tag(dirty(fset)(1)), fset, 0.U(6.W)) - aw.len := 15.U - aw.size := "b011".U // 8 字节 + aw.len := cached_len.U + aw.size := cached_size.U awvalid := true.B w.data := data(dirty(fset)(1)) - w.strb := 15.U + w.strb := ~0.U(AXI_STRB_WID.W) w.last := false.B wvalid := true.B aw_handshake := true.B @@ -351,23 +396,23 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul } } is(s_replace) { - when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) { + when(!(writeFifo.io.deq.valid || write_buffer_axi_busy)) { when(victim.working) { when(victim.writeback) { when(victim_cnt.value =/= (burstSize - 1).U) { victim_cnt.inc() } - read_ready_set := victim.set + read_ready_set := victim.index read_ready_cnt := victim_cnt.value - read_buffer(read_ready_cnt) := data(lru(pset)) + read_buffer(read_ready_cnt) := data(lru(physical_index)) when(!aw_handshake) { - aw.addr := Cat(tag(lru(pset)), pset, 0.U(6.W)) - aw.len := 15.U - aw.size := "b011".U // 8 字节 + aw.addr := Cat(tag(lru(physical_index)), physical_index, 0.U(6.W)) + aw.len := cached_len.U + aw.size := cached_size.U awvalid := true.B aw_handshake := true.B - w.data := data(lru(pset)) - w.strb := 15.U + w.data := data(lru(physical_index)) + w.strb := ~0.U(AXI_STRB_WID.W) w.last := false.B wvalid := true.B } @@ -380,7 +425,7 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul }.otherwise { w.data := Mux( ((axi_cnt.value + 1.U) === read_ready_cnt), - data(lru(pset)), + data(lru(physical_index)), read_buffer(axi_cnt.value + 1.U) ) axi_cnt.inc() @@ -390,29 +435,29 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul } } when(io.axi.b.valid) { - dirty(pset)(lru(pset)) := false.B - victim.writeback := false.B + dirty(physical_index)(lru(physical_index)) := false.B + victim.writeback := false.B } } when(!ar_handshake) { - ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W)) - ar.len := 15.U - ar.size := "b011".U // 8 字节 - arvalid := true.B - rready := true.B - ar_handshake := true.B - victim.wstrb(lru(pset)) := "hff".U - tag_wstrb(lru(pset)) := true.B - tag_wdata := io.cpu.tlb.pa(31, 12) + ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W)) + ar.len := cached_len.U + ar.size := cached_size.U // 8 字节 + arvalid := true.B + rready := true.B + ar_handshake := true.B + victim.wstrb(lru(physical_index)) := ~0.U(AXI_STRB_WID.W) + tag_wstrb(lru(physical_index)) := true.B + tag_wdata := io.cpu.tlb.pa(31, 12) } when(io.axi.ar.fire) { - tag_wstrb(lru(pset)) := false.B - arvalid := false.B + tag_wstrb(lru(physical_index)) := false.B + arvalid := false.B } when(io.axi.r.fire) { when(io.axi.r.bits.last) { - rready := false.B - victim.wstrb(lru(pset)) := 0.U + rready := false.B + victim.wstrb(lru(physical_index)) := 0.U }.otherwise { victim.waddr := victim.waddr + 1.U } @@ -420,8 +465,8 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul when( (!victim.writeback || io.axi.b.valid) && ((ar_handshake && io.axi.r.valid && io.axi.r.bits.last) || (ar_handshake && !rready)) ) { - victim.valid := false.B - valid(pset)(lru(pset)) := true.B + victim.valid := false.B + valid(physical_index)(lru(physical_index)) := true.B } when(!victim.valid) { victim.working := false.B diff --git a/chisel/playground/src/cache/ICache.scala b/chisel/playground/src/cache/ICache.scala index f05d2b0..90e0479 100644 --- a/chisel/playground/src/cache/ICache.scala +++ b/chisel/playground/src/cache/ICache.scala @@ -8,18 +8,55 @@ import cpu.defines._ import cpu.CpuConfig import cpu.defines.Const._ +/* + 整个宽度为PADDR_WID的地址 + ========================================================== + | tag | index | offset | + | | | bank index | bank offset | + ========================================================== + + nway 组,nindex 行 + ====================================================== + | valid | tag | bank 0 | bank 1 | ... | bank n | + | 1 | | | | | | + ====================================================== + | bank | + | inst 0 | inst 1 | ... | inst n | + | 32 | 32 | ... | 32 | + ===================================== + + 本CPU的实现如下: + 每个bank分为多个instBlocks,每个instBlocks的宽度为AXI_DATA_WID,这样能方便的和AXI总线进行交互 + RV64实现中AXI_DATA_WID为64,所以每个instBlocks可以存储2条指令 + 而instBlocks的个数会和instFetchNum相关 + - 当instFetchNum为4时,instBlocks的个数为2 + - 当instFetchNum为2时,instBlocks的个数为1 + 读取数据时会将一个bank中的所有instBlocks读取出来,然后再将instBlocks中的数据按照偏移量重新排列 + 这样的设计可以保证一个bank的指令数对应instFetchNum + + ====================================================== + | valid | tag | bank 0 | bank 1 | ... | bank n | + | 1 | | | | | | + ====================================================== + | bank | + | instBlocks | instBlocks | + | inst 0 | inst 1 | inst 0 | inst 1 | + | 32 | 32 | 32 | 32 | + ===================================== + */ + class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module { - val nway: Int = cacheConfig.nway - val nindex: Int = cacheConfig.nindex - val nbank: Int = cacheConfig.nbank - val instFetchNum: Int = config.instFetchNum - val bankOffsetWidth: Int = cacheConfig.bankOffsetWidth - val bankIndexWidth: Int = cacheConfig.offsetWidth - bankOffsetWidth - val bytesPerBank: Int = cacheConfig.bytesPerBank - val tagWidth: Int = cacheConfig.tagWidth - val indexWidth: Int = cacheConfig.indexWidth - val offsetWidth: Int = cacheConfig.offsetWidth - val bitsPerBank: Int = cacheConfig.bitsPerBank + val nway = cacheConfig.nway + val nindex = cacheConfig.nindex + val nbank = cacheConfig.nbank + val instFetchNum = config.instFetchNum + val bankOffsetWidth = cacheConfig.bankOffsetWidth + val bankIndexWidth = cacheConfig.offsetWidth - bankOffsetWidth + val bytesPerBank = cacheConfig.bytesPerBank + val tagWidth = cacheConfig.tagWidth + val indexWidth = cacheConfig.indexWidth + val offsetWidth = cacheConfig.offsetWidth + val bitsPerBank = cacheConfig.bitsPerBank val io = IO(new Bundle { val cpu = Flipped(new Cache_ICache()) val axi = new ICache_AXIInterface() @@ -31,12 +68,6 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul "bitsPerBank must be greater than AXI_DATA_WID" ) - // 整个宽度为PADDR_WID的地址 - // ========================================================== - // | tag | index | offset | - // | | | bank index | bank offset | - // ========================================================== - // 一个bank是bitsPerBank宽度,一个bank中有instFetchNum个指令 // 每个bank中指令块的个数,一个指令块是AXI_DATA_WID宽度 val instBlocksPerBank = bitsPerBank / AXI_DATA_WID @@ -49,17 +80,6 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul val s_idle :: s_uncached :: s_replace :: s_save :: Nil = Enum(4) val state = RegInit(s_idle) - // * nway * nindex * // - // * 128 bit for 4 inst * // - // ========================================================= - // | valid | tag | bank 0 | bank 1 | bank 2 | bank 3 | - // | 1 | 20 | 128 | 128 | 128 | 128 | - // ========================================================= - // | bank | - // | inst 0 | inst 1 | inst 2 | inst 3 | - // | 32 | 32 | 32 | 32 | - // ===================================== - // nway 路,每路 nindex 行,每行 nbank 个 bank,每行的nbank共用一个valid val valid = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(nindex)(false.B))))) @@ -97,7 +117,7 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul // * replace index * // val replace_index = RegInit(0.U(indexWidth.W)) // 用于控制写入一行cache条目中的哪个bank, 一个bank可能有多次写入 - val repalce_wstrb = RegInit( + val replace_wstrb = RegInit( VecInit(Seq.fill(nway)(VecInit(Seq.fill(nbank)(VecInit(Seq.fill(instBlocksPerBank)((false.B))))))) ) @@ -107,9 +127,6 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul val cache_hit_available = cache_hit && io.cpu.tlb.translation_ok && !io.cpu.tlb.uncached val select_way = tag_compare_valid(1) // 1路命中时值为1,0路命中时值为0 //TODO:支持更多路数 - // | bank | - // | inst 0 | inst 1 | - // | 32 | 32 | // 将一个 bank 中的指令分成 instFetchNum 份,每份 INST_WID bit val inst_in_bank = VecInit( Seq.tabulate(instFetchNum)(i => data(select_way)(bank_index).asUInt((i + 1) * INST_WID - 1, i * INST_WID)) @@ -138,9 +155,11 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul })))) // 对于可缓存段访存时读取的数据宽度应该和AXI_DATA的宽度相同 - val cached_rsize = log2Ceil(AXI_DATA_WID / 8) + val cached_size = log2Ceil(AXI_DATA_WID / 8) + val cached_len = (nbank * instBlocksPerBank - 1) // 对于不可缓存段访存时读取的数据宽度应该和指令宽度相同 - val uncached_rsize = log2Ceil(INST_WID / 8) + val uncached_size = log2Ceil(INST_WID / 8) + val uncached_len = 0 // bank tag ram for { i <- 0 until nway } { @@ -158,10 +177,10 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul bank(j)(k).io.raddr := data_rindex data(i)(j)(k) := bank(j)(k).io.rdata - bank(j)(k).io.wen := repalce_wstrb(i)(j)(k) + bank(j)(k).io.wen := replace_wstrb(i)(j)(k) bank(j)(k).io.waddr := replace_index bank(j)(k).io.wdata := io.axi.r.bits.data - bank(j)(k).io.wstrb := repalce_wstrb(i)(j)(k) + bank(j)(k).io.wstrb := replace_wstrb(i)(j)(k) } } } @@ -198,7 +217,7 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul val addr_err = io.cpu.addr(should_next_addr)(XLEN - 1, PADDR_WID).orR when(acc_err) { acc_err := false.B } - io.cpu.acc_err := acc_err + io.cpu.acc_err := acc_err //TODO:实现cached段中的访存错误 switch(state) { is(s_idle) { @@ -219,20 +238,20 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul }.elsewhen(io.cpu.tlb.uncached) { state := s_uncached ar.addr := io.cpu.tlb.pa - ar.len := 0.U - ar.size := uncached_rsize.U + ar.len := uncached_len.U + ar.size := uncached_size.U arvalid := true.B }.elsewhen(!cache_hit) { state := s_replace // 取指时按bank块取指 ar.addr := Cat(io.cpu.tlb.pa(PADDR_WID - 1, offsetWidth), 0.U(offsetWidth.W)) - ar.len := (nbank * instBlocksPerBank - 1).U - ar.size := cached_rsize.U + ar.len := cached_len.U + ar.size := cached_size.U arvalid := true.B replace_index := virtual_index - repalce_wstrb(replace_way).map(_.map(_ := false.B)) - repalce_wstrb(replace_way)(0)(0) := true.B // 从第一个bank的第一个指令块开始写入 + replace_wstrb(replace_way).map(_.map(_ := false.B)) + replace_wstrb(replace_way)(0)(0) := true.B // 从第一个bank的第一个指令块开始写入 tag_wstrb(replace_way) := true.B tag_wdata := io.cpu.tlb.tag valid(replace_way)(virtual_index) := true.B @@ -271,11 +290,11 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul // * burst transport * // when(!io.axi.r.bits.last) { // 左移写掩码,写入下一个bank,或是同一个bank的下一个指令 - repalce_wstrb(replace_way) := - ((repalce_wstrb(replace_way).asUInt << 1)).asTypeOf(repalce_wstrb(replace_way)) + replace_wstrb(replace_way) := + ((replace_wstrb(replace_way).asUInt << 1)).asTypeOf(replace_wstrb(replace_way)) }.otherwise { rready := false.B - repalce_wstrb(replace_way).map(_.map(_ := false.B)) + replace_wstrb(replace_way).map(_.map(_ := false.B)) tag_wstrb(replace_way) := false.B } }.elsewhen(!io.axi.r.ready) { @@ -290,12 +309,12 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul } } - println("ICache: ") - println("nindex: " + nindex) - println("nbank: " + nbank) - println("bankOffsetWidth: " + bankOffsetWidth) - println("bytesPerBank: " + bytesPerBank) - println("tagWidth: " + tagWidth) - println("indexWidth: " + indexWidth) - println("offsetWidth: " + offsetWidth) + // println("ICache: ") + // println("nindex: " + nindex) + // println("nbank: " + nbank) + // println("bankOffsetWidth: " + bankOffsetWidth) + // println("bytesPerBank: " + bytesPerBank) + // println("tagWidth: " + tagWidth) + // println("indexWidth: " + indexWidth) + // println("offsetWidth: " + offsetWidth) }