diff --git a/chisel/playground/src/CpuConfig.scala b/chisel/playground/src/CpuConfig.scala
index 8a3eec3..6e63e66 100644
--- a/chisel/playground/src/CpuConfig.scala
+++ b/chisel/playground/src/CpuConfig.scala
@@ -16,7 +16,7 @@ case class CpuConfig(
   val decoderNum:      Int     = 2, // 同时访问寄存器的指令数
   val commitNum:       Int     = 2, // 同时提交的指令数
   val fuNum:           Int     = 2, // 功能单元数
-  val instFetchNum:    Int     = 2, // iCache取到的指令数量
+  val instFetchNum:    Int     = 2, // iCache取到的指令数量，目前为2和4时验证正确
   val instFifoDepth:   Int     = 8, // 指令缓存深度
   val mulClockNum:     Int     = 2, // 乘法器的时钟周期数
   val divClockNum:     Int     = 8, // 除法器的时钟周期数
diff --git a/chisel/playground/src/cache/Cache.scala b/chisel/playground/src/cache/Cache.scala
index 76e6d96..9409bf2 100644
--- a/chisel/playground/src/cache/Cache.scala
+++ b/chisel/playground/src/cache/Cache.scala
@@ -15,7 +15,7 @@ class Cache(implicit config: CpuConfig) extends Module {
   })
 
   implicit val iCacheConfig =
-    CacheConfig(nindex = 64, nbank = 4, bytesPerBank = (32 / 8) * config.instFetchNum) // 每个 bank 存 2 条 32 bit 指令
+    CacheConfig(nindex = 64, nbank = 4, bytesPerBank = (INST_WID / 8) * config.instFetchNum) // 每个 bank 存 2 条 32 bit 指令
   implicit val dCacheConfig =
     CacheConfig(nindex = 128, nbank = 8, bytesPerBank = XLEN / 8) // 每个 bank 存 1 条 XLEN bit 数据
 
diff --git a/chisel/playground/src/cache/ICache.scala b/chisel/playground/src/cache/ICache.scala
index da24e57..6816357 100644
--- a/chisel/playground/src/cache/ICache.scala
+++ b/chisel/playground/src/cache/ICache.scala
@@ -25,6 +25,8 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
     val axi = new ICache_AXIInterface()
   })
   require(isPow2(instFetchNum), "ninst must be power of 2")
+  require(instFetchNum == bytesPerBank / 4, "instFetchNum must equal to instperbank")
+  require(bitsPerBank >= AXI_DATA_WID, "bitsPerBank must be greater than AXI_DATA_WID")
 
   // 整个宽度为PADDR_WID的地址
   // ==========================================================
@@ -32,6 +34,10 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
   // |                    |        | bank index | bank offset |
   // ==========================================================
 
+  // 一个bank是bitsPerBank宽度，一个bank中有instFetchNum个指令
+  // 每个bank中指令块的个数，一个指令块是AXI_DATA_WID宽度
+  val instBlocksPerBank = bitsPerBank / AXI_DATA_WID
+
   val bank_index  = io.cpu.addr(0)(offsetWidth - 1, bankOffsetWidth)
   val bank_offset = io.cpu.addr(0)(bankOffsetWidth - 1, 2) // PC低2位必定是0
 
@@ -44,30 +50,29 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
   // * 128 bit for 4 inst * //
   // =========================================================
   // | valid | tag |  bank 0 | bank 1  |  bank 2 | bank 3 |
-  // | 1     | 20  |   128   |   128   |   128   |  128   |
+  // | 1111  | 20  |   128   |   128   |   128   |  128   |
   // =========================================================
   // |                bank               |
   // | inst 0 | inst 1 | inst 2 | inst 3 |
   // |   32   |   32   |   32   |   32   |
   // =====================================
-  require(instFetchNum == bytesPerBank / 4, "instFetchNum must equal to instperbank")
-  val valid = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nbank)(false.B)))))
 
-  val data = Wire(Vec(nway, Vec(nbank, UInt(XLEN.W))))
-  val tag  = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
+  val valid = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nbank)(false.B)))))
 
   // * should choose next addr * //
   val should_next_addr = (state === s_idle && !tlb_fill) || (state === s_save)
 
-  val data_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
-  val data_wstrb = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(nbank)(false.B)))))
+  // 读取一个cache条目中的所有bank行
+  val data        = Wire(Vec(nway, Vec(nbank, Vec(instBlocksPerBank, UInt(AXI_DATA_WID.W)))))
+  val data_rindex = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
 
+  val tag       = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
   val tag_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
   val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
   val tag_wdata = RegInit(0.U(tagWidth.W))
 
   // * lru * //
-  val lru = RegInit(VecInit(Seq.fill(nindex * nbank)(false.B)))
+  val lru = RegInit(VecInit(Seq.fill(nindex * nbank)(false.B))) // TODO:检查lru的正确性
 
   // * itlb * //
   when(tlb_fill) { tlb_fill := false.B }
@@ -75,35 +80,39 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
   io.cpu.tlb.icache_is_save := (state === s_save)
 
   // * fence * //
-  // fence指令时清空cache，即将所有valid位置0
+  // fence指令时清空cache，等同于将所有valid位置0
   when(io.cpu.fence && !io.cpu.icache_stall && io.cpu.cpu_ready) {
     valid := 0.U.asTypeOf(valid)
   }
 
   // * replace index * //
-  val rindex = RegInit(0.U(indexWidth.W))
+  val replace_index = RegInit(0.U(indexWidth.W))
+  // 用于控制写入一行cache条目中的哪个bank, 一个bank可能有多次写入
+  val repalce_wstrb = RegInit(
+    VecInit(Seq.fill(nway)(VecInit(Seq.fill(nbank)(VecInit(Seq.fill(instBlocksPerBank)((false.B)))))))
+  )
 
   // * virtual index * //
-  val vindex = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth)
+  val virtual_index = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth)
 
   // * cache hit * //
-  val tag_compare_valid   = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.tag && valid(vindex)(i)))
+  val tag_compare_valid   = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.tag && valid(virtual_index)(i)))
   val cache_hit           = tag_compare_valid.contains(true.B)
   val cache_hit_available = cache_hit && io.cpu.tlb.translation_ok && !io.cpu.tlb.uncached
   val sel                 = tag_compare_valid(1)
 
+  // |     bank        |
+  // | inst 0 | inst 1 |
+  // |   32   |   32   |
   // 将一个 bank 中的指令分成 instFetchNum 份，每份 INST_WID bit
   val inst_in_bank = VecInit(
-    Seq.tabulate(instFetchNum)(i => data(sel)(bank_index)((i + 1) * INST_WID - 1, i * INST_WID))
+    Seq.tabulate(instFetchNum)(i => data(sel)(bank_index).asUInt((i + 1) * INST_WID - 1, i * INST_WID))
   )
 
   // 将 inst_in_bank 中的指令按照 bank_offset 位偏移量重新排列
   // 处理偏移导致的跨 bank 读取
   // 当offset为0时，不需要重新排列
-  // 当offset为1时，此时发送到cpu的inst0应该是inst1，inst1应该无数据
-  // |     bank        |
-  // | inst 0 | inst 1 |
-  // |   32   |   32   |
+  // 当offset为1时，此时发送到cpu的inst0应该是inst1，inst1应该无数据，并设置对应的valid
   val inst = VecInit(
     Seq.tabulate(instFetchNum)(i =>
       Mux(
@@ -122,23 +131,31 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
     val valid = Bool()
   }))))
 
-  val rlen  = nbank
-  val rsize = log2Ceil(bytesPerBank)
+  // 对于可缓存段访存时读取的数据宽度应该和AXI_DATA的宽度相同
+  val cached_rsize = log2Ceil(AXI_DATA_WID / 8)
+  // 对于不可缓存段访存时读取的数据宽度应该和指令宽度相同
+  val uncached_rsize = log2Ceil(INST_WID / 8)
 
   // bank tag ram
   for { i <- 0 until nway } {
     // 每一个条目中有nbank个bank，每个bank存储instFetchNum个指令
     val bank =
-      Seq.fill(nbank)(Module(new SimpleDualPortRam(depth = nindex, width = bitsPerBank, byteAddressable = false)))
+      Seq.fill(nbank)(
+        Seq.fill(instBlocksPerBank)(
+          Module(new SimpleDualPortRam(depth = nindex, width = AXI_DATA_WID, byteAddressable = false))
+        )
+      )
     for { j <- 0 until nbank } {
-      bank(j).io.ren   := true.B
-      bank(j).io.raddr := data_raddr
-      data(i)(j)       := bank(j).io.rdata
+      for { k <- 0 until instBlocksPerBank } {
+        bank(j)(k).io.ren   := true.B
+        bank(j)(k).io.raddr := data_rindex
+        data(i)(j)(k)       := bank(j)(k).io.rdata
 
-      bank(j).io.wen   := data_wstrb(i)(j)
-      bank(j).io.waddr := rindex
-      bank(j).io.wdata := io.axi.r.bits.data
-      bank(j).io.wstrb := data_wstrb(i)(j)
+        bank(j)(k).io.wen   := repalce_wstrb(i)(j)(k)
+        bank(j)(k).io.waddr := replace_index
+        bank(j)(k).io.wdata := io.axi.r.bits.data
+        bank(j)(k).io.wstrb := repalce_wstrb(i)(j)(k)
+      }
     }
   }
 
@@ -153,7 +170,7 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
     tag(i)            := tag_bram.io.rdata
 
     tag_bram.io.wen   := tag_wstrb(i)
-    tag_bram.io.waddr := rindex
+    tag_bram.io.waddr := replace_index
     tag_bram.io.wdata := tag_wdata
   }
 
@@ -195,27 +212,27 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
           state   := s_uncached
           ar.addr := io.cpu.tlb.pa
           ar.len  := 0.U
-          ar.size := rsize.U
+          ar.size := uncached_rsize.U
           arvalid := true.B
         }.elsewhen(!cache_hit) {
           state := s_replace
           // 取指时按bank块取指
           ar.addr := Cat(io.cpu.tlb.pa(PADDR_WID - 1, offsetWidth), 0.U(offsetWidth.W))
-          ar.len  := (rlen - 1).U
-          ar.size := rsize.U
+          ar.len  := (nbank * instBlocksPerBank - 1).U
+          ar.size := cached_rsize.U
           arvalid := true.B
 
-          rindex                        := vindex
-          data_wstrb(lru(vindex)).map(_ := false.B)
-          data_wstrb(lru(vindex))(0)    := true.B // 从第一个bank开始写入
-          tag_wstrb(lru(vindex))        := true.B
-          tag_wdata                     := io.cpu.tlb.tag
-          valid(vindex)(lru(vindex))    := true.B
+          replace_index := virtual_index
+          repalce_wstrb(lru(virtual_index)).map(_.map(_ := false.B))
+          repalce_wstrb(lru(virtual_index))(0)(0)  := true.B // 从第一个bank的第一个指令块开始写入
+          tag_wstrb(lru(virtual_index))            := true.B
+          tag_wdata                                := io.cpu.tlb.tag
+          valid(virtual_index)(lru(virtual_index)) := true.B
         }.elsewhen(!io.cpu.icache_stall) {
-          lru(vindex) := ~sel
+          lru(virtual_index) := ~sel
           when(!io.cpu.cpu_ready) {
             state := s_save
-            (1 until instFetchNum).foreach(i => saved(i).inst := data(sel)(i))
+            (1 until instFetchNum).foreach(i => saved(i).inst := inst(i))
             (0 until instFetchNum).foreach(i => saved(i).valid := inst_valid(i))
           }
         }
@@ -245,12 +262,13 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
       }.elsewhen(io.axi.r.fire) {
         // * burst transport * //
         when(!io.axi.r.bits.last) {
-          // 左移写掩码，写入下一个bank
-          data_wstrb(lru(vindex)) := ((data_wstrb(lru(vindex)).asUInt << 1)(nbank - 1, 0)).asBools
+          // 左移写掩码，写入下一个bank，或是同一个bank的下一个指令
+          repalce_wstrb(lru(virtual_index)) :=
+            ((repalce_wstrb(lru(virtual_index)).asUInt << 1)).asTypeOf(repalce_wstrb(lru(virtual_index)))
         }.otherwise {
-          rready                        := false.B
-          data_wstrb(lru(vindex)).map(_ := false.B)
-          tag_wstrb(lru(vindex))        := false.B
+          rready := false.B
+          repalce_wstrb(lru(virtual_index)).map(_.map(_ := false.B))
+          tag_wstrb(lru(virtual_index)) := false.B
         }
       }.elsewhen(!io.axi.r.ready) {
         state := s_idle
@@ -272,6 +290,4 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
   println("tagWidth: " + tagWidth)
   println("indexWidth: " + indexWidth)
   println("offsetWidth: " + offsetWidth)
-  println("size: " + rsize)
-  println("len: " + rlen)
 }