From 442f51d5a46884821d3176e176e90c53279da34d Mon Sep 17 00:00:00 2001
From: Liphen <xi.lifeng@qq.com>
Date: Sat, 23 Dec 2023 11:47:35 +0800
Subject: [PATCH] =?UTF-8?q?fix(icache):=20=E4=BF=AE=E5=A4=8D=E4=B9=8B?=
 =?UTF-8?q?=E5=89=8Dicache=E9=81=97=E7=95=99=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 chisel/playground/src/CpuConfig.scala         |  15 +-
 chisel/playground/src/cache/Cache.scala       |   4 +-
 chisel/playground/src/cache/DCache.scala      |  22 +--
 chisel/playground/src/cache/ICache.scala      | 174 +++++++++++-------
 .../src/cache/memory/PortDefinitions.scala    |   4 +-
 .../src/cache/memory/SimpleDualPortRam.scala  |  25 ++-
 chisel/playground/src/defines/Const.scala     |   2 +-
 7 files changed, 142 insertions(+), 104 deletions(-)

diff --git a/chisel/playground/src/CpuConfig.scala b/chisel/playground/src/CpuConfig.scala
index c430d51..8a3eec3 100644
--- a/chisel/playground/src/CpuConfig.scala
+++ b/chisel/playground/src/CpuConfig.scala
@@ -28,23 +28,22 @@ case class BranchPredictorConfig(
   val phtDepth: Int = 6)
 
 case class CacheConfig(
-  nway:      Int = 2, // 路数
-  nbank:     Int = 8, // 每个项目中的bank数
-  nindex:    Int, // 每路的项目数
-  bankWidth: Int // 每个bank中的字节数
+  nway:         Int = 2, // 路数
+  nbank:        Int, // 每个项目中的bank数
+  nindex:       Int, // 每路的项目数
+  bytesPerBank: Int // 每个bank中的字节数
 ) {
   val config          = CpuConfig()
   val indexWidth      = log2Ceil(nindex) // index的位宽
   val bankIndexWidth  = log2Ceil(nbank)
-  val bankOffsetWidth = log2Ceil(bankWidth)
+  val bankOffsetWidth = log2Ceil(bytesPerBank)
   val offsetWidth     = bankIndexWidth + bankOffsetWidth // offset的位宽
   val tagWidth        = 32 - indexWidth - offsetWidth // tag的位宽
-  val bankWidthBits   = bankWidth * 8
-  val burstSize       = 16
+  val bitsPerBank     = bytesPerBank * 8
   require(isPow2(nindex))
   require(isPow2(nway))
   require(isPow2(nbank))
-  require(isPow2(bankWidth))
+  require(isPow2(bytesPerBank))
   require(
     tagWidth + indexWidth + bankIndexWidth + bankOffsetWidth == 32,
     "basic request calculation"
diff --git a/chisel/playground/src/cache/Cache.scala b/chisel/playground/src/cache/Cache.scala
index c50a199..76e6d96 100644
--- a/chisel/playground/src/cache/Cache.scala
+++ b/chisel/playground/src/cache/Cache.scala
@@ -15,9 +15,9 @@ class Cache(implicit config: CpuConfig) extends Module {
   })
 
   implicit val iCacheConfig =
-    CacheConfig(nindex = 64, nbank = 4, bankWidth = (32 / 8) * 4) // 每个 bank 存 4 条 32 bit 指令
+    CacheConfig(nindex = 64, nbank = 4, bytesPerBank = (32 / 8) * config.instFetchNum) // 每个 bank 存 2 条 32 bit 指令
   implicit val dCacheConfig =
-    CacheConfig(nindex = 128, bankWidth = XLEN / 8) // 每个 bank 存 1 条 XLEN bit 数据
+    CacheConfig(nindex = 128, nbank = 8, bytesPerBank = XLEN / 8) // 每个 bank 存 1 条 XLEN bit 数据
 
   val icache        = Module(new ICache(iCacheConfig))
   val dcache        = Module(new DCache(dCacheConfig))
diff --git a/chisel/playground/src/cache/DCache.scala b/chisel/playground/src/cache/DCache.scala
index f683904..e86bffd 100644
--- a/chisel/playground/src/cache/DCache.scala
+++ b/chisel/playground/src/cache/DCache.scala
@@ -16,12 +16,12 @@ class WriteBufferUnit extends Bundle {
 }
 
 class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module {
-  val nway:          Int = cacheConfig.nway
-  val nset:          Int = cacheConfig.nindex
-  val nbank:         Int = cacheConfig.nbank
-  val bankWidthBits: Int = cacheConfig.bankWidthBits
-  val tagWidth:      Int = cacheConfig.tagWidth
-  val burstSize:     Int = cacheConfig.burstSize
+  val nway:        Int = cacheConfig.nway
+  val nindex:      Int = cacheConfig.nindex
+  val nbank:       Int = cacheConfig.nbank
+  val bitsPerBank: Int = cacheConfig.bitsPerBank
+  val tagWidth:    Int = cacheConfig.tagWidth
+  val burstSize:   Int = 16
 
   val io = IO(new Bundle {
     val cpu = Flipped(new Cache_DCache())
@@ -37,9 +37,9 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
   io.cpu.tlb.dcache_is_idle := state === s_idle
 
   // * valid dirty * //
-  val valid = RegInit(VecInit(Seq.fill(nset)(VecInit(Seq.fill(nway)(false.B)))))
-  val dirty = RegInit(VecInit(Seq.fill(nset)(VecInit(Seq.fill(nway)(false.B)))))
-  val lru   = RegInit(VecInit(Seq.fill(nset)(0.U(1.W))))
+  val valid = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B)))))
+  val dirty = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B)))))
+  val lru   = RegInit(VecInit(Seq.fill(nindex)(0.U(1.W))))
 
   val write_fifo = Module(new Queue(new WriteBufferUnit(), 4))
 
@@ -115,7 +115,7 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
 
   // bank tagv ram
   for { i <- 0 until nway } {
-    val bank_ram = Module(new SimpleDualPortRam(nset * nbank, bankWidthBits, byteAddressable = true))
+    val bank_ram = Module(new SimpleDualPortRam(nindex * nbank, bitsPerBank, byteAddressable = true))
     bank_ram.io.ren   := true.B
     bank_ram.io.raddr := data_raddr
     data(i)           := bank_ram.io.rdata
@@ -125,7 +125,7 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
     bank_ram.io.wdata := data_wdata
     bank_ram.io.wstrb := data_wstrb(i)
 
-    val tag_ram = Module(new LUTRam(nset, tagWidth))
+    val tag_ram = Module(new LUTRam(nindex, tagWidth))
     tag_ram.io.raddr := tag_raddr
     tag(i)           := tag_ram.io.rdata
 
diff --git a/chisel/playground/src/cache/ICache.scala b/chisel/playground/src/cache/ICache.scala
index 5739d06..da24e57 100644
--- a/chisel/playground/src/cache/ICache.scala
+++ b/chisel/playground/src/cache/ICache.scala
@@ -10,35 +10,37 @@ import cpu.defines.Const._
 
 class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module {
   val nway:            Int = cacheConfig.nway
-  val nset:            Int = cacheConfig.nindex
+  val nindex:          Int = cacheConfig.nindex
   val nbank:           Int = cacheConfig.nbank
-  val ninst:           Int = config.instFetchNum
+  val instFetchNum:    Int = config.instFetchNum
   val bankOffsetWidth: Int = cacheConfig.bankOffsetWidth
-  val bankWidth:       Int = cacheConfig.bankWidth
+  val bankIndexWidth:  Int = cacheConfig.offsetWidth - bankOffsetWidth
+  val bytesPerBank:    Int = cacheConfig.bytesPerBank
   val tagWidth:        Int = cacheConfig.tagWidth
   val indexWidth:      Int = cacheConfig.indexWidth
   val offsetWidth:     Int = cacheConfig.offsetWidth
+  val bitsPerBank:     Int = cacheConfig.bitsPerBank
   val io = IO(new Bundle {
     val cpu = Flipped(new Cache_ICache())
     val axi = new ICache_AXIInterface()
   })
-  require(isPow2(ninst), "ninst must be power of 2")
-  // * addr organization * //
-  // ======================================
-  // |        tag         |  index |offset|
-  // |31                12|11     6|5    0|
-  // ======================================
-  // |         offset           |
-  // | bank index | bank offset |
-  // | 5        4 | 3         2 |
-  // ============================
+  require(isPow2(instFetchNum), "ninst must be power of 2")
+
+  // 整个宽度为PADDR_WID的地址
+  // ==========================================================
+  // |        tag         |  index |         offset           |
+  // |                    |        | bank index | bank offset |
+  // ==========================================================
+
+  val bank_index  = io.cpu.addr(0)(offsetWidth - 1, bankOffsetWidth)
+  val bank_offset = io.cpu.addr(0)(bankOffsetWidth - 1, 2) // PC低2位必定是0
 
   val tlb_fill = RegInit(false.B)
   // * fsm * //
   val s_idle :: s_uncached :: s_replace :: s_save :: Nil = Enum(4)
   val state                                              = RegInit(s_idle)
 
-  // * nway * nset * //
+  // * nway * nindex * //
   // * 128 bit for 4 inst * //
   // =========================================================
   // | valid | tag |  bank 0 | bank 1  |  bank 2 | bank 3 |
@@ -48,24 +50,24 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
   // | inst 0 | inst 1 | inst 2 | inst 3 |
   // |   32   |   32   |   32   |   32   |
   // =====================================
-  val instperbank = bankWidth / 4 // 每个bank存储的指令数
-  val valid       = RegInit(VecInit(Seq.fill(nset * nbank)(VecInit(Seq.fill(instperbank)(false.B)))))
+  require(instFetchNum == bytesPerBank / 4, "instFetchNum must equal to instperbank")
+  val valid = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nbank)(false.B)))))
 
-  val data = Wire(Vec(nway, Vec(instperbank, UInt(XLEN.W))))
+  val data = Wire(Vec(nway, Vec(nbank, UInt(XLEN.W))))
   val tag  = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
 
   // * should choose next addr * //
   val should_next_addr = (state === s_idle && !tlb_fill) || (state === s_save)
 
-  val data_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, bankOffsetWidth)
-  val data_wstrb = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(instperbank)(0.U(4.W))))))
+  val data_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
+  val data_wstrb = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(nbank)(false.B)))))
 
   val tag_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
   val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
   val tag_wdata = RegInit(0.U(tagWidth.W))
 
   // * lru * //
-  val lru = RegInit(VecInit(Seq.fill(nset * nbank)(false.B)))
+  val lru = RegInit(VecInit(Seq.fill(nindex * nbank)(false.B)))
 
   // * itlb * //
   when(tlb_fill) { tlb_fill := false.B }
@@ -73,64 +75,85 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
   io.cpu.tlb.icache_is_save := (state === s_save)
 
   // * fence * //
+  // fence指令时清空cache，即将所有valid位置0
   when(io.cpu.fence && !io.cpu.icache_stall && io.cpu.cpu_ready) {
-    valid.map(_ := VecInit(Seq.fill(instperbank)(false.B)))
+    valid := 0.U.asTypeOf(valid)
   }
 
-  // * replace set * //
-  val rset = RegInit(0.U(6.W))
+  // * replace index * //
+  val rindex = RegInit(0.U(indexWidth.W))
 
-  // * virtual set * //
-  val vset = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth)
+  // * virtual index * //
+  val vindex = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth)
 
   // * cache hit * //
-  val tag_compare_valid   = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.tag && valid(vset)(i)))
+  val tag_compare_valid   = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.tag && valid(vindex)(i)))
   val cache_hit           = tag_compare_valid.contains(true.B)
   val cache_hit_available = cache_hit && io.cpu.tlb.translation_ok && !io.cpu.tlb.uncached
   val sel                 = tag_compare_valid(1)
 
-  val bank_offset = io.cpu.addr(0)(log2Ceil(instperbank) + 1, 2)
-  val inst = VecInit(
-    Seq.tabulate(instperbank)(i => Mux(i.U <= (3.U - bank_offset), data(sel)(i.U + bank_offset), 0.U))
+  // 将一个 bank 中的指令分成 instFetchNum 份，每份 INST_WID bit
+  val inst_in_bank = VecInit(
+    Seq.tabulate(instFetchNum)(i => data(sel)(bank_index)((i + 1) * INST_WID - 1, i * INST_WID))
   )
-  val inst_valid = VecInit(Seq.tabulate(instperbank)(i => cache_hit_available && i.U <= (3.U - bank_offset)))
 
-  val saved = RegInit(VecInit(Seq.fill(instperbank)(0.U.asTypeOf(new Bundle {
+  // 将 inst_in_bank 中的指令按照 bank_offset 位偏移量重新排列
+  // 处理偏移导致的跨 bank 读取
+  // 当offset为0时，不需要重新排列
+  // 当offset为1时，此时发送到cpu的inst0应该是inst1，inst1应该无数据
+  // |     bank        |
+  // | inst 0 | inst 1 |
+  // |   32   |   32   |
+  val inst = VecInit(
+    Seq.tabulate(instFetchNum)(i =>
+      Mux(
+        i.U <= ((instFetchNum - 1).U - bank_offset),
+        inst_in_bank(i.U + bank_offset),
+        0.U
+      )
+    )
+  )
+  val inst_valid = VecInit(
+    Seq.tabulate(instFetchNum)(i => cache_hit_available && i.U <= ((instFetchNum - 1).U - bank_offset))
+  )
+
+  val saved = RegInit(VecInit(Seq.fill(instFetchNum)(0.U.asTypeOf(new Bundle {
     val inst  = UInt(INST_WID.W)
     val valid = Bool()
   }))))
 
-  val axi_cnt = Counter(cacheConfig.burstSize)
+  val rlen  = nbank
+  val rsize = log2Ceil(bytesPerBank)
 
   // bank tag ram
-  for { i <- 0 until nway; j <- 0 until instperbank } {
-    val bank = Module(new SimpleDualPortRam(nset * nbank, INST_WID, byteAddressable = true))
-    bank.io.ren   := true.B
-    bank.io.raddr := data_raddr
-    data(i)(j)    := bank.io.rdata
+  for { i <- 0 until nway } {
+    // 每一个条目中有nbank个bank，每个bank存储instFetchNum个指令
+    val bank =
+      Seq.fill(nbank)(Module(new SimpleDualPortRam(depth = nindex, width = bitsPerBank, byteAddressable = false)))
+    for { j <- 0 until nbank } {
+      bank(j).io.ren   := true.B
+      bank(j).io.raddr := data_raddr
+      data(i)(j)       := bank(j).io.rdata
 
-    bank.io.wen   := data_wstrb(i)(j).orR
-    bank.io.waddr := Cat(rset, axi_cnt.value(log2Ceil(cacheConfig.burstSize) - 1, log2Ceil(instperbank)))
-    bank.io.wdata := Mux(
-      j.U === axi_cnt.value(log2Ceil(instperbank) - 1, 0),
-      Mux(axi_cnt.value(0) === 0.U, io.axi.r.bits.data(31, 0), io.axi.r.bits.data(63, 32)),
-      0.U
-    )
-    bank.io.wstrb := data_wstrb(i)(j)
+      bank(j).io.wen   := data_wstrb(i)(j)
+      bank(j).io.waddr := rindex
+      bank(j).io.wdata := io.axi.r.bits.data
+      bank(j).io.wstrb := data_wstrb(i)(j)
+    }
   }
 
-  for { i <- 0 until ninst } {
+  for { i <- 0 until instFetchNum } {
     io.cpu.inst_valid(i) := Mux(state === s_idle && !tlb_fill, inst_valid(i), saved(i).valid) && io.cpu.req
     io.cpu.inst(i)       := Mux(state === s_idle && !tlb_fill, inst(i), saved(i).inst)
   }
 
   for { i <- 0 until nway } {
-    val tag_bram = Module(new LUTRam(nset, tagWidth))
+    val tag_bram = Module(new LUTRam(nindex, tagWidth))
     tag_bram.io.raddr := tag_raddr
     tag(i)            := tag_bram.io.rdata
 
     tag_bram.io.wen   := tag_wstrb(i)
-    tag_bram.io.waddr := rset
+    tag_bram.io.waddr := rindex
     tag_bram.io.wdata := tag_wdata
   }
 
@@ -171,28 +194,29 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
         }.elsewhen(io.cpu.tlb.uncached) {
           state   := s_uncached
           ar.addr := io.cpu.tlb.pa
-          ar.len  := 0.U(log2Ceil((nbank * bankWidth) / 4).W)
-          ar.size := 2.U(bankOffsetWidth.W)
+          ar.len  := 0.U
+          ar.size := rsize.U
           arvalid := true.B
         }.elsewhen(!cache_hit) {
-          state   := s_replace
-          ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W))
-          ar.len  := 15.U(log2Ceil((nbank * bankWidth) / 4).W)
-          ar.size := 2.U(bankOffsetWidth.W)
+          state := s_replace
+          // 取指时按bank块取指
+          ar.addr := Cat(io.cpu.tlb.pa(PADDR_WID - 1, offsetWidth), 0.U(offsetWidth.W))
+          ar.len  := (rlen - 1).U
+          ar.size := rsize.U
           arvalid := true.B
 
-          rset := vset
-          (0 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := Mux(i.U === 0.U, 0xf.U, 0x0.U))
-          tag_wstrb(lru(vset))   := true.B
-          tag_wdata              := io.cpu.tlb.tag
-          valid(vset)(lru(vset)) := true.B
-          axi_cnt.reset()
+          rindex                        := vindex
+          data_wstrb(lru(vindex)).map(_ := false.B)
+          data_wstrb(lru(vindex))(0)    := true.B // 从第一个bank开始写入
+          tag_wstrb(lru(vindex))        := true.B
+          tag_wdata                     := io.cpu.tlb.tag
+          valid(vindex)(lru(vindex))    := true.B
         }.elsewhen(!io.cpu.icache_stall) {
-          lru(vset) := ~sel
+          lru(vindex) := ~sel
           when(!io.cpu.cpu_ready) {
             state := s_save
-            (1 until instperbank).foreach(i => saved(i).inst := data(sel)(i))
-            (0 until instperbank).foreach(i => saved(i).valid := inst_valid(i))
+            (1 until instFetchNum).foreach(i => saved(i).inst := data(sel)(i))
+            (0 until instFetchNum).foreach(i => saved(i).valid := inst_valid(i))
           }
         }
       }
@@ -221,13 +245,12 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
       }.elsewhen(io.axi.r.fire) {
         // * burst transport * //
         when(!io.axi.r.bits.last) {
-          axi_cnt.inc()
-          data_wstrb(lru(vset))(0) := data_wstrb(lru(vset))(instperbank - 1)
-          (1 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := data_wstrb(lru(vset))(i - 1))
+          // 左移写掩码，写入下一个bank
+          data_wstrb(lru(vindex)) := ((data_wstrb(lru(vindex)).asUInt << 1)(nbank - 1, 0)).asBools
         }.otherwise {
-          rready                := false.B
-          data_wstrb(lru(vset)) := 0.U.asTypeOf(Vec(instperbank, UInt(4.W)))
-          tag_wstrb(lru(vset))  := false.B
+          rready                        := false.B
+          data_wstrb(lru(vindex)).map(_ := false.B)
+          tag_wstrb(lru(vindex))        := false.B
         }
       }.elsewhen(!io.axi.r.ready) {
         state := s_idle
@@ -236,8 +259,19 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
     is(s_save) {
       when(io.cpu.cpu_ready && !io.cpu.icache_stall) {
         state := s_idle
-        (0 until instperbank).foreach(i => saved(i).valid := false.B)
+        (0 until instFetchNum).foreach(i => saved(i).valid := false.B)
       }
     }
   }
+
+  println("ICache: ")
+  println("nindex: " + nindex)
+  println("nbank: " + nbank)
+  println("bankOffsetWidth: " + bankOffsetWidth)
+  println("bytesPerBank: " + bytesPerBank)
+  println("tagWidth: " + tagWidth)
+  println("indexWidth: " + indexWidth)
+  println("offsetWidth: " + offsetWidth)
+  println("size: " + rsize)
+  println("len: " + rlen)
 }
diff --git a/chisel/playground/src/cache/memory/PortDefinitions.scala b/chisel/playground/src/cache/memory/PortDefinitions.scala
index b918f5e..0c587ab 100644
--- a/chisel/playground/src/cache/memory/PortDefinitions.scala
+++ b/chisel/playground/src/cache/memory/PortDefinitions.scala
@@ -17,7 +17,7 @@ class WriteOnlyPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) exten
 
 class WriteOnlyMaskPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
   val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
-  val en   = Input(UInt(cacheConfig.bankWidth.W))
+  val en   = Input(UInt(cacheConfig.bytesPerBank.W))
   val data = Input(gen)
 }
 
@@ -31,7 +31,7 @@ class ReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) exten
 
 class MaskedReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
   val addr      = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
-  val writeMask = Input(UInt(cacheConfig.bankWidth.W))
+  val writeMask = Input(UInt(cacheConfig.bytesPerBank.W))
   val wdata     = Input(gen)
   val rdata     = Output(gen)
 }
diff --git a/chisel/playground/src/cache/memory/SimpleDualPortRam.scala b/chisel/playground/src/cache/memory/SimpleDualPortRam.scala
index 44ef47a..576d6e4 100644
--- a/chisel/playground/src/cache/memory/SimpleDualPortRam.scala
+++ b/chisel/playground/src/cache/memory/SimpleDualPortRam.scala
@@ -16,13 +16,18 @@ import cpu.CpuConfig
   * @param cpuCfg
   *   the implicit configuration for simulation and elaboration
   */
-class SimpleDualPortRam(depth: Int, width: Int, byteAddressable: Boolean)(implicit
-    val config: CpuConfig,
-) extends Module {
+class SimpleDualPortRam(
+  depth:           Int,
+  width:           Int,
+  byteAddressable: Boolean
+)(
+  implicit
+  val config: CpuConfig)
+    extends Module {
   require(isPow2(depth))
   require(
     width % 8 == 0 || !byteAddressable,
-    "if memory is byte addressable, then the adderss width must be a multiple of 8",
+    "if memory is byte addressable, then the adderss width must be a multiple of 8"
   )
   val waddridth = log2Ceil(depth)
 
@@ -40,11 +45,11 @@ class SimpleDualPortRam(depth: Int, width: Int, byteAddressable: Boolean)(implic
   if (config.build) {
     val memory = Module(
       new SimpleDualPortRamIP(
-        wdataidth = width,
+        wdataidth      = width,
         byteWriteWidth = if (byteAddressable) 8 else width,
-        numberOfLines = depth,
-        waddridth = waddridth,
-      ),
+        numberOfLines  = depth,
+        waddridth      = waddridth
+      )
     )
     memory.io.clka := clock
     memory.io.clkb := clock
@@ -62,12 +67,12 @@ class SimpleDualPortRam(depth: Int, width: Int, byteAddressable: Boolean)(implic
   } else {
     assert(
       io.wstrb.orR || !io.wen,
-      "when write port enable is high, write vector cannot be all 0",
+      "when write port enable is high, write vector cannot be all 0"
     )
     if (byteAddressable) {
       val bank = SyncReadMem(depth, Vec(width / 8, UInt(8.W)))
       when(io.ren) {
-        io.rdata := bank(io.raddr).asTypeOf(io.rdata)
+        io.rdata := bank.read(io.raddr).asTypeOf(UInt(width.W))
       }.otherwise {
         io.rdata := DontCare
       }
diff --git a/chisel/playground/src/defines/Const.scala b/chisel/playground/src/defines/Const.scala
index f28185f..b2b567f 100644
--- a/chisel/playground/src/defines/Const.scala
+++ b/chisel/playground/src/defines/Const.scala
@@ -20,7 +20,7 @@ trait Constants extends CoreParameter {
   val EXC_WID = 16
 
   // inst rom
-  val INST_WID      = XLEN
+  val INST_WID      = 32
   val INST_ADDR_WID = XLEN
 
   // data ram