docs(cache): 增加注释
This commit is contained in:
parent
c1cf6f8b7d
commit
e646ee4a4c
|
@ -8,6 +8,41 @@ import cpu.defines._
|
|||
import cpu.CpuConfig
|
||||
import cpu.defines.Const._
|
||||
|
||||
/*
|
||||
整个宽度为PADDR_WID的地址
|
||||
==========================================================
|
||||
| tag | index | offset |
|
||||
| | | bank index | bank offset |
|
||||
==========================================================
|
||||
|
||||
nway 组,nindex 行
|
||||
==============================================================
|
||||
| valid | dirty | tag | bank 0 | bank 1 | ... | bank n |
|
||||
| 1 | 1 | | | | | |
|
||||
==============================================================
|
||||
| bank |
|
||||
| data 0 | data 1 | ... | data n |
|
||||
| XLEN | XLEN | ... | XLEN |
|
||||
=====================================
|
||||
|
||||
本 CPU 的实现如下:
|
||||
每个bank分为多个dataBlocks,每个dataBlocks的宽度为AXI_DATA_WID,这样能方便的和AXI总线进行交互
|
||||
RV64实现中AXI_DATA_WID为64,所以每个dataBlocks可以存储1个数据
|
||||
为了简化设计,目前一个bank中只有一个dataBlocks,即每个bank中只能存储一个数据
|
||||
这样的话dataBlocks可以被简化掉,直接用bank代替
|
||||
//TODO:解决AXI_DATA_WID小于XLEN的情况
|
||||
|
||||
==============================================================
|
||||
| valid | dirty | tag | bank 0 | bank 1 | ... | bank n |
|
||||
| 1 | 1 | | | | | |
|
||||
==============================================================
|
||||
| bank |
|
||||
| dataBlocks |
|
||||
| data 0 |
|
||||
| 64 |
|
||||
===================
|
||||
*/
|
||||
|
||||
class WriteBufferUnit extends Bundle {
|
||||
val data = UInt(XLEN.W)
|
||||
val addr = UInt(DATA_ADDR_WID.W)
|
||||
|
@ -19,33 +54,43 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
val nway: Int = cacheConfig.nway
|
||||
val nindex: Int = cacheConfig.nindex
|
||||
val nbank: Int = cacheConfig.nbank
|
||||
val bitsPerBank: Int = cacheConfig.bitsPerBank
|
||||
val tagWidth: Int = cacheConfig.tagWidth
|
||||
val burstSize: Int = 16
|
||||
val writeFifoDepth: Int = 4
|
||||
|
||||
// 每个bank中存AXI_DATA_WID位的数据
|
||||
// TODO:目前的实现只保证了AXI_DATA_WID为XLEN的情况下的正确性
|
||||
require(AXI_DATA_WID == XLEN, "AXI_DATA_WID should be greater than XLEN")
|
||||
|
||||
val io = IO(new Bundle {
|
||||
val cpu = Flipped(new Cache_DCache())
|
||||
val axi = new DCache_AXIInterface()
|
||||
})
|
||||
|
||||
val tlb_fill = RegInit(false.B)
|
||||
// * fsm * //
|
||||
val s_idle :: s_uncached :: s_writeback :: s_replace :: s_save :: Nil = Enum(5)
|
||||
val state = RegInit(s_idle)
|
||||
|
||||
val tlb_fill = RegInit(false.B)
|
||||
io.cpu.tlb.fill := tlb_fill
|
||||
io.cpu.tlb.dcache_is_idle := state === s_idle
|
||||
|
||||
// 每个bank中只有一个dataBlocks
|
||||
val dataBlocksPerBank = 1
|
||||
// axi信号中size的宽度,对于cached段,size为3位
|
||||
val cached_size = log2Ceil(AXI_DATA_WID / 8)
|
||||
val cached_len = (nbank * dataBlocksPerBank - 1)
|
||||
|
||||
// * valid dirty * //
|
||||
// 每行有一个有效位和一个脏位
|
||||
val valid = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B)))))
|
||||
val dirty = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B)))))
|
||||
val lru = RegInit(VecInit(Seq.fill(nindex)(0.U(1.W))))
|
||||
val lru = RegInit(VecInit(Seq.fill(nindex)(false.B))) // TODO:支持更多路数,目前只支持2路
|
||||
|
||||
val write_fifo = Module(new Queue(new WriteBufferUnit(), 4))
|
||||
val writeFifo = Module(new Queue(new WriteBufferUnit(), writeFifoDepth))
|
||||
|
||||
write_fifo.io.enq.valid := false.B
|
||||
write_fifo.io.enq.bits := 0.U.asTypeOf(new WriteBufferUnit())
|
||||
write_fifo.io.deq.ready := false.B
|
||||
writeFifo.io.enq.valid := false.B
|
||||
writeFifo.io.enq.bits := 0.U.asTypeOf(new WriteBufferUnit())
|
||||
writeFifo.io.deq.ready := false.B
|
||||
|
||||
val axi_cnt = Counter(burstSize)
|
||||
val read_ready_cnt = RegInit(0.U(4.W))
|
||||
|
@ -54,14 +99,14 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
// * victim cache * //
|
||||
val victim = RegInit(0.U.asTypeOf(new Bundle {
|
||||
val valid = Bool()
|
||||
val set = UInt(6.W)
|
||||
val index = UInt(6.W)
|
||||
val waddr = UInt(10.W)
|
||||
val wstrb = Vec(nway, UInt(AXI_STRB_WID.W))
|
||||
val working = Bool()
|
||||
val writeback = Bool()
|
||||
}))
|
||||
val victim_cnt = Counter(burstSize)
|
||||
val victim_addr = Cat(victim.set, victim_cnt.value)
|
||||
val victim_addr = Cat(victim.index, victim_cnt.value)
|
||||
|
||||
val fset = io.cpu.addr(11, 6)
|
||||
val fence = RegInit(0.U.asTypeOf(new Bundle {
|
||||
|
@ -77,7 +122,7 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
val data_waddr = Mux(victim.valid, victim.waddr, io.cpu.addr(11, 2))
|
||||
val data_wdata = Mux(state === s_replace, io.axi.r.bits.data, io.cpu.wdata)
|
||||
|
||||
val tag_raddr = Mux(victim.valid, victim.set, io.cpu.addr(11, 6))
|
||||
val tag_raddr = Mux(victim.valid, victim.index, io.cpu.addr(11, 6))
|
||||
val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
|
||||
val tag_wdata = RegInit(0.U(tagWidth.W))
|
||||
|
||||
|
@ -88,13 +133,13 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
val cache_hit = tag_compare_valid.contains(true.B)
|
||||
|
||||
val mmio_read_stall = io.cpu.tlb.uncached && !io.cpu.wen.orR
|
||||
val mmio_write_stall = io.cpu.tlb.uncached && io.cpu.wen.orR && !write_fifo.io.enq.ready
|
||||
val mmio_write_stall = io.cpu.tlb.uncached && io.cpu.wen.orR && !writeFifo.io.enq.ready
|
||||
val cached_stall = !io.cpu.tlb.uncached && !cache_hit
|
||||
|
||||
val sel = tag_compare_valid(1)
|
||||
|
||||
// * physical set * //
|
||||
val pset = io.cpu.addr(11, 6)
|
||||
// * physical index * //
|
||||
val physical_index = io.cpu.addr(11, 6)
|
||||
|
||||
val dcache_stall = Mux(
|
||||
state === s_idle && !tlb_fill,
|
||||
|
@ -115,25 +160,25 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
|
||||
// bank tagv ram
|
||||
for { i <- 0 until nway } {
|
||||
val bank_ram = Module(new SimpleDualPortRam(nindex * nbank, bitsPerBank, byteAddressable = true))
|
||||
bank_ram.io.ren := true.B
|
||||
bank_ram.io.raddr := data_raddr
|
||||
data(i) := bank_ram.io.rdata
|
||||
val bank = Module(new SimpleDualPortRam(nindex * nbank, AXI_DATA_WID, byteAddressable = true))
|
||||
bank.io.ren := true.B
|
||||
bank.io.raddr := data_raddr
|
||||
data(i) := bank.io.rdata
|
||||
|
||||
bank_ram.io.wen := data_wstrb(i).orR
|
||||
bank_ram.io.waddr := data_waddr
|
||||
bank_ram.io.wdata := data_wdata
|
||||
bank_ram.io.wstrb := data_wstrb(i)
|
||||
bank.io.wen := data_wstrb(i).orR
|
||||
bank.io.waddr := data_waddr
|
||||
bank.io.wdata := data_wdata
|
||||
bank.io.wstrb := data_wstrb(i)
|
||||
|
||||
val tag_ram = Module(new LUTRam(nindex, tagWidth))
|
||||
tag_ram.io.raddr := tag_raddr
|
||||
tag(i) := tag_ram.io.rdata
|
||||
val tagRam = Module(new LUTRam(nindex, tagWidth))
|
||||
tagRam.io.raddr := tag_raddr
|
||||
tag(i) := tagRam.io.rdata
|
||||
|
||||
tag_ram.io.wen := tag_wstrb(i)
|
||||
tag_ram.io.waddr := victim.set
|
||||
tag_ram.io.wdata := tag_wdata
|
||||
tagRam.io.wen := tag_wstrb(i)
|
||||
tagRam.io.waddr := victim.index
|
||||
tagRam.io.wdata := tag_wdata
|
||||
|
||||
tag_compare_valid(i) := tag(i) === io.cpu.tlb.tag && valid(pset)(i) && io.cpu.tlb.translation_ok
|
||||
tag_compare_valid(i) := tag(i) === io.cpu.tlb.tag && valid(physical_index)(i) && io.cpu.tlb.translation_ok
|
||||
cache_data_forward(i) := Mux(
|
||||
last_waddr === io.cpu.addr(11, 2),
|
||||
((last_wstrb(i) & last_wdata) | (data(i) & (~last_wstrb(i)))),
|
||||
|
@ -188,13 +233,13 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
when(io.axi.b.fire) {
|
||||
write_buffer_axi_busy := false.B
|
||||
}
|
||||
}.elsewhen(write_fifo.io.deq.valid) {
|
||||
write_fifo.io.deq.ready := write_fifo.io.deq.valid
|
||||
when(write_fifo.io.deq.fire) {
|
||||
aw.addr := write_fifo.io.deq.bits.addr
|
||||
aw.size := write_fifo.io.deq.bits.size
|
||||
w.data := write_fifo.io.deq.bits.data
|
||||
w.strb := write_fifo.io.deq.bits.strb
|
||||
}.elsewhen(writeFifo.io.deq.valid) {
|
||||
writeFifo.io.deq.ready := writeFifo.io.deq.valid
|
||||
when(writeFifo.io.deq.fire) {
|
||||
aw.addr := writeFifo.io.deq.bits.addr
|
||||
aw.size := writeFifo.io.deq.bits.size
|
||||
w.data := writeFifo.io.deq.bits.data
|
||||
w.strb := writeFifo.io.deq.bits.strb
|
||||
}
|
||||
aw.len := 0.U
|
||||
awvalid := true.B
|
||||
|
@ -221,23 +266,23 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
}
|
||||
}.elsewhen(io.cpu.tlb.uncached) {
|
||||
when(io.cpu.wen.orR) {
|
||||
when(write_fifo.io.enq.ready && !current_mmio_write_saved) {
|
||||
write_fifo.io.enq.valid := true.B
|
||||
write_fifo.io.enq.bits.addr := Mux(
|
||||
when(writeFifo.io.enq.ready && !current_mmio_write_saved) {
|
||||
writeFifo.io.enq.valid := true.B
|
||||
writeFifo.io.enq.bits.addr := Mux(
|
||||
io.cpu.rlen === 2.U,
|
||||
Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)),
|
||||
io.cpu.tlb.pa
|
||||
)
|
||||
write_fifo.io.enq.bits.size := io.cpu.rlen
|
||||
write_fifo.io.enq.bits.strb := io.cpu.wstrb
|
||||
write_fifo.io.enq.bits.data := io.cpu.wdata
|
||||
writeFifo.io.enq.bits.size := io.cpu.rlen
|
||||
writeFifo.io.enq.bits.strb := io.cpu.wstrb
|
||||
writeFifo.io.enq.bits.data := io.cpu.wdata
|
||||
|
||||
current_mmio_write_saved := true.B
|
||||
}
|
||||
when(io.cpu.dcache_ready && io.cpu.cpu_ready) {
|
||||
current_mmio_write_saved := false.B
|
||||
}
|
||||
}.elsewhen(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) {
|
||||
}.elsewhen(!(writeFifo.io.deq.valid || write_buffer_axi_busy)) {
|
||||
ar.addr := Mux(io.cpu.rlen === 2.U, Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)), io.cpu.tlb.pa)
|
||||
ar.len := 0.U
|
||||
ar.size := io.cpu.rlen
|
||||
|
@ -249,19 +294,19 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
when(!cache_hit) {
|
||||
state := s_replace
|
||||
axi_cnt.reset()
|
||||
victim.set := pset
|
||||
victim.index := physical_index
|
||||
victim_cnt.reset()
|
||||
read_ready_set := pset
|
||||
read_ready_set := physical_index
|
||||
read_ready_cnt := 0.U
|
||||
victim.waddr := Cat(pset, 0.U(4.W))
|
||||
victim.waddr := Cat(physical_index, 0.U(4.W))
|
||||
victim.valid := true.B
|
||||
victim.writeback := dirty(pset)(lru(pset))
|
||||
victim.writeback := dirty(physical_index)(lru(physical_index))
|
||||
}.otherwise {
|
||||
when(io.cpu.dcache_ready) {
|
||||
// update lru and mark dirty
|
||||
lru(pset) := ~sel
|
||||
lru(physical_index) := ~sel
|
||||
when(io.cpu.wen.orR) {
|
||||
dirty(pset)(sel) := true.B
|
||||
dirty(physical_index)(sel) := true.B
|
||||
}
|
||||
when(!io.cpu.cpu_ready) {
|
||||
saved_rdata := cache_data_forward(sel)
|
||||
|
@ -272,10 +317,10 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
}
|
||||
}.elsewhen(io.cpu.fence) {
|
||||
when(dirty(fset).contains(true.B)) {
|
||||
when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) {
|
||||
when(!(writeFifo.io.deq.valid || write_buffer_axi_busy)) {
|
||||
state := s_writeback
|
||||
axi_cnt.reset()
|
||||
victim.set := fset
|
||||
victim.index := fset
|
||||
victim_cnt.reset()
|
||||
read_ready_set := fset
|
||||
read_ready_cnt := 0.U
|
||||
|
@ -305,16 +350,16 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
when(victim_cnt.value =/= (burstSize - 1).U) {
|
||||
victim_cnt.inc()
|
||||
}
|
||||
read_ready_set := victim.set
|
||||
read_ready_set := victim.index
|
||||
read_ready_cnt := victim_cnt.value
|
||||
read_buffer(read_ready_cnt) := data(dirty(fset)(1))
|
||||
when(!aw_handshake) {
|
||||
aw.addr := Cat(tag(dirty(fset)(1)), fset, 0.U(6.W))
|
||||
aw.len := 15.U
|
||||
aw.size := "b011".U // 8 字节
|
||||
aw.len := cached_len.U
|
||||
aw.size := cached_size.U
|
||||
awvalid := true.B
|
||||
w.data := data(dirty(fset)(1))
|
||||
w.strb := 15.U
|
||||
w.strb := ~0.U(AXI_STRB_WID.W)
|
||||
w.last := false.B
|
||||
wvalid := true.B
|
||||
aw_handshake := true.B
|
||||
|
@ -351,23 +396,23 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
}
|
||||
}
|
||||
is(s_replace) {
|
||||
when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) {
|
||||
when(!(writeFifo.io.deq.valid || write_buffer_axi_busy)) {
|
||||
when(victim.working) {
|
||||
when(victim.writeback) {
|
||||
when(victim_cnt.value =/= (burstSize - 1).U) {
|
||||
victim_cnt.inc()
|
||||
}
|
||||
read_ready_set := victim.set
|
||||
read_ready_set := victim.index
|
||||
read_ready_cnt := victim_cnt.value
|
||||
read_buffer(read_ready_cnt) := data(lru(pset))
|
||||
read_buffer(read_ready_cnt) := data(lru(physical_index))
|
||||
when(!aw_handshake) {
|
||||
aw.addr := Cat(tag(lru(pset)), pset, 0.U(6.W))
|
||||
aw.len := 15.U
|
||||
aw.size := "b011".U // 8 字节
|
||||
aw.addr := Cat(tag(lru(physical_index)), physical_index, 0.U(6.W))
|
||||
aw.len := cached_len.U
|
||||
aw.size := cached_size.U
|
||||
awvalid := true.B
|
||||
aw_handshake := true.B
|
||||
w.data := data(lru(pset))
|
||||
w.strb := 15.U
|
||||
w.data := data(lru(physical_index))
|
||||
w.strb := ~0.U(AXI_STRB_WID.W)
|
||||
w.last := false.B
|
||||
wvalid := true.B
|
||||
}
|
||||
|
@ -380,7 +425,7 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
}.otherwise {
|
||||
w.data := Mux(
|
||||
((axi_cnt.value + 1.U) === read_ready_cnt),
|
||||
data(lru(pset)),
|
||||
data(lru(physical_index)),
|
||||
read_buffer(axi_cnt.value + 1.U)
|
||||
)
|
||||
axi_cnt.inc()
|
||||
|
@ -390,29 +435,29 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
}
|
||||
}
|
||||
when(io.axi.b.valid) {
|
||||
dirty(pset)(lru(pset)) := false.B
|
||||
dirty(physical_index)(lru(physical_index)) := false.B
|
||||
victim.writeback := false.B
|
||||
}
|
||||
}
|
||||
when(!ar_handshake) {
|
||||
ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W))
|
||||
ar.len := 15.U
|
||||
ar.size := "b011".U // 8 字节
|
||||
ar.len := cached_len.U
|
||||
ar.size := cached_size.U // 8 字节
|
||||
arvalid := true.B
|
||||
rready := true.B
|
||||
ar_handshake := true.B
|
||||
victim.wstrb(lru(pset)) := "hff".U
|
||||
tag_wstrb(lru(pset)) := true.B
|
||||
victim.wstrb(lru(physical_index)) := ~0.U(AXI_STRB_WID.W)
|
||||
tag_wstrb(lru(physical_index)) := true.B
|
||||
tag_wdata := io.cpu.tlb.pa(31, 12)
|
||||
}
|
||||
when(io.axi.ar.fire) {
|
||||
tag_wstrb(lru(pset)) := false.B
|
||||
tag_wstrb(lru(physical_index)) := false.B
|
||||
arvalid := false.B
|
||||
}
|
||||
when(io.axi.r.fire) {
|
||||
when(io.axi.r.bits.last) {
|
||||
rready := false.B
|
||||
victim.wstrb(lru(pset)) := 0.U
|
||||
victim.wstrb(lru(physical_index)) := 0.U
|
||||
}.otherwise {
|
||||
victim.waddr := victim.waddr + 1.U
|
||||
}
|
||||
|
@ -421,7 +466,7 @@ class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
(!victim.writeback || io.axi.b.valid) && ((ar_handshake && io.axi.r.valid && io.axi.r.bits.last) || (ar_handshake && !rready))
|
||||
) {
|
||||
victim.valid := false.B
|
||||
valid(pset)(lru(pset)) := true.B
|
||||
valid(physical_index)(lru(physical_index)) := true.B
|
||||
}
|
||||
when(!victim.valid) {
|
||||
victim.working := false.B
|
||||
|
|
|
@ -8,18 +8,55 @@ import cpu.defines._
|
|||
import cpu.CpuConfig
|
||||
import cpu.defines.Const._
|
||||
|
||||
/*
|
||||
整个宽度为PADDR_WID的地址
|
||||
==========================================================
|
||||
| tag | index | offset |
|
||||
| | | bank index | bank offset |
|
||||
==========================================================
|
||||
|
||||
nway 组,nindex 行
|
||||
======================================================
|
||||
| valid | tag | bank 0 | bank 1 | ... | bank n |
|
||||
| 1 | | | | | |
|
||||
======================================================
|
||||
| bank |
|
||||
| inst 0 | inst 1 | ... | inst n |
|
||||
| 32 | 32 | ... | 32 |
|
||||
=====================================
|
||||
|
||||
本CPU的实现如下:
|
||||
每个bank分为多个instBlocks,每个instBlocks的宽度为AXI_DATA_WID,这样能方便的和AXI总线进行交互
|
||||
RV64实现中AXI_DATA_WID为64,所以每个instBlocks可以存储2条指令
|
||||
而instBlocks的个数会和instFetchNum相关
|
||||
- 当instFetchNum为4时,instBlocks的个数为2
|
||||
- 当instFetchNum为2时,instBlocks的个数为1
|
||||
读取数据时会将一个bank中的所有instBlocks读取出来,然后再将instBlocks中的数据按照偏移量重新排列
|
||||
这样的设计可以保证一个bank的指令数对应instFetchNum
|
||||
|
||||
======================================================
|
||||
| valid | tag | bank 0 | bank 1 | ... | bank n |
|
||||
| 1 | | | | | |
|
||||
======================================================
|
||||
| bank |
|
||||
| instBlocks | instBlocks |
|
||||
| inst 0 | inst 1 | inst 0 | inst 1 |
|
||||
| 32 | 32 | 32 | 32 |
|
||||
=====================================
|
||||
*/
|
||||
|
||||
class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module {
|
||||
val nway: Int = cacheConfig.nway
|
||||
val nindex: Int = cacheConfig.nindex
|
||||
val nbank: Int = cacheConfig.nbank
|
||||
val instFetchNum: Int = config.instFetchNum
|
||||
val bankOffsetWidth: Int = cacheConfig.bankOffsetWidth
|
||||
val bankIndexWidth: Int = cacheConfig.offsetWidth - bankOffsetWidth
|
||||
val bytesPerBank: Int = cacheConfig.bytesPerBank
|
||||
val tagWidth: Int = cacheConfig.tagWidth
|
||||
val indexWidth: Int = cacheConfig.indexWidth
|
||||
val offsetWidth: Int = cacheConfig.offsetWidth
|
||||
val bitsPerBank: Int = cacheConfig.bitsPerBank
|
||||
val nway = cacheConfig.nway
|
||||
val nindex = cacheConfig.nindex
|
||||
val nbank = cacheConfig.nbank
|
||||
val instFetchNum = config.instFetchNum
|
||||
val bankOffsetWidth = cacheConfig.bankOffsetWidth
|
||||
val bankIndexWidth = cacheConfig.offsetWidth - bankOffsetWidth
|
||||
val bytesPerBank = cacheConfig.bytesPerBank
|
||||
val tagWidth = cacheConfig.tagWidth
|
||||
val indexWidth = cacheConfig.indexWidth
|
||||
val offsetWidth = cacheConfig.offsetWidth
|
||||
val bitsPerBank = cacheConfig.bitsPerBank
|
||||
val io = IO(new Bundle {
|
||||
val cpu = Flipped(new Cache_ICache())
|
||||
val axi = new ICache_AXIInterface()
|
||||
|
@ -31,12 +68,6 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
"bitsPerBank must be greater than AXI_DATA_WID"
|
||||
)
|
||||
|
||||
// 整个宽度为PADDR_WID的地址
|
||||
// ==========================================================
|
||||
// | tag | index | offset |
|
||||
// | | | bank index | bank offset |
|
||||
// ==========================================================
|
||||
|
||||
// 一个bank是bitsPerBank宽度,一个bank中有instFetchNum个指令
|
||||
// 每个bank中指令块的个数,一个指令块是AXI_DATA_WID宽度
|
||||
val instBlocksPerBank = bitsPerBank / AXI_DATA_WID
|
||||
|
@ -49,17 +80,6 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
val s_idle :: s_uncached :: s_replace :: s_save :: Nil = Enum(4)
|
||||
val state = RegInit(s_idle)
|
||||
|
||||
// * nway * nindex * //
|
||||
// * 128 bit for 4 inst * //
|
||||
// =========================================================
|
||||
// | valid | tag | bank 0 | bank 1 | bank 2 | bank 3 |
|
||||
// | 1 | 20 | 128 | 128 | 128 | 128 |
|
||||
// =========================================================
|
||||
// | bank |
|
||||
// | inst 0 | inst 1 | inst 2 | inst 3 |
|
||||
// | 32 | 32 | 32 | 32 |
|
||||
// =====================================
|
||||
|
||||
// nway 路,每路 nindex 行,每行 nbank 个 bank,每行的nbank共用一个valid
|
||||
val valid = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(nindex)(false.B)))))
|
||||
|
||||
|
@ -97,7 +117,7 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
// * replace index * //
|
||||
val replace_index = RegInit(0.U(indexWidth.W))
|
||||
// 用于控制写入一行cache条目中的哪个bank, 一个bank可能有多次写入
|
||||
val repalce_wstrb = RegInit(
|
||||
val replace_wstrb = RegInit(
|
||||
VecInit(Seq.fill(nway)(VecInit(Seq.fill(nbank)(VecInit(Seq.fill(instBlocksPerBank)((false.B)))))))
|
||||
)
|
||||
|
||||
|
@ -107,9 +127,6 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
val cache_hit_available = cache_hit && io.cpu.tlb.translation_ok && !io.cpu.tlb.uncached
|
||||
val select_way = tag_compare_valid(1) // 1路命中时值为1,0路命中时值为0 //TODO:支持更多路数
|
||||
|
||||
// | bank |
|
||||
// | inst 0 | inst 1 |
|
||||
// | 32 | 32 |
|
||||
// 将一个 bank 中的指令分成 instFetchNum 份,每份 INST_WID bit
|
||||
val inst_in_bank = VecInit(
|
||||
Seq.tabulate(instFetchNum)(i => data(select_way)(bank_index).asUInt((i + 1) * INST_WID - 1, i * INST_WID))
|
||||
|
@ -138,9 +155,11 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
}))))
|
||||
|
||||
// 对于可缓存段访存时读取的数据宽度应该和AXI_DATA的宽度相同
|
||||
val cached_rsize = log2Ceil(AXI_DATA_WID / 8)
|
||||
val cached_size = log2Ceil(AXI_DATA_WID / 8)
|
||||
val cached_len = (nbank * instBlocksPerBank - 1)
|
||||
// 对于不可缓存段访存时读取的数据宽度应该和指令宽度相同
|
||||
val uncached_rsize = log2Ceil(INST_WID / 8)
|
||||
val uncached_size = log2Ceil(INST_WID / 8)
|
||||
val uncached_len = 0
|
||||
|
||||
// bank tag ram
|
||||
for { i <- 0 until nway } {
|
||||
|
@ -158,10 +177,10 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
bank(j)(k).io.raddr := data_rindex
|
||||
data(i)(j)(k) := bank(j)(k).io.rdata
|
||||
|
||||
bank(j)(k).io.wen := repalce_wstrb(i)(j)(k)
|
||||
bank(j)(k).io.wen := replace_wstrb(i)(j)(k)
|
||||
bank(j)(k).io.waddr := replace_index
|
||||
bank(j)(k).io.wdata := io.axi.r.bits.data
|
||||
bank(j)(k).io.wstrb := repalce_wstrb(i)(j)(k)
|
||||
bank(j)(k).io.wstrb := replace_wstrb(i)(j)(k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -198,7 +217,7 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
val addr_err = io.cpu.addr(should_next_addr)(XLEN - 1, PADDR_WID).orR
|
||||
|
||||
when(acc_err) { acc_err := false.B }
|
||||
io.cpu.acc_err := acc_err
|
||||
io.cpu.acc_err := acc_err //TODO:实现cached段中的访存错误
|
||||
|
||||
switch(state) {
|
||||
is(s_idle) {
|
||||
|
@ -219,20 +238,20 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
}.elsewhen(io.cpu.tlb.uncached) {
|
||||
state := s_uncached
|
||||
ar.addr := io.cpu.tlb.pa
|
||||
ar.len := 0.U
|
||||
ar.size := uncached_rsize.U
|
||||
ar.len := uncached_len.U
|
||||
ar.size := uncached_size.U
|
||||
arvalid := true.B
|
||||
}.elsewhen(!cache_hit) {
|
||||
state := s_replace
|
||||
// 取指时按bank块取指
|
||||
ar.addr := Cat(io.cpu.tlb.pa(PADDR_WID - 1, offsetWidth), 0.U(offsetWidth.W))
|
||||
ar.len := (nbank * instBlocksPerBank - 1).U
|
||||
ar.size := cached_rsize.U
|
||||
ar.len := cached_len.U
|
||||
ar.size := cached_size.U
|
||||
arvalid := true.B
|
||||
|
||||
replace_index := virtual_index
|
||||
repalce_wstrb(replace_way).map(_.map(_ := false.B))
|
||||
repalce_wstrb(replace_way)(0)(0) := true.B // 从第一个bank的第一个指令块开始写入
|
||||
replace_wstrb(replace_way).map(_.map(_ := false.B))
|
||||
replace_wstrb(replace_way)(0)(0) := true.B // 从第一个bank的第一个指令块开始写入
|
||||
tag_wstrb(replace_way) := true.B
|
||||
tag_wdata := io.cpu.tlb.tag
|
||||
valid(replace_way)(virtual_index) := true.B
|
||||
|
@ -271,11 +290,11 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
// * burst transport * //
|
||||
when(!io.axi.r.bits.last) {
|
||||
// 左移写掩码,写入下一个bank,或是同一个bank的下一个指令
|
||||
repalce_wstrb(replace_way) :=
|
||||
((repalce_wstrb(replace_way).asUInt << 1)).asTypeOf(repalce_wstrb(replace_way))
|
||||
replace_wstrb(replace_way) :=
|
||||
((replace_wstrb(replace_way).asUInt << 1)).asTypeOf(replace_wstrb(replace_way))
|
||||
}.otherwise {
|
||||
rready := false.B
|
||||
repalce_wstrb(replace_way).map(_.map(_ := false.B))
|
||||
replace_wstrb(replace_way).map(_.map(_ := false.B))
|
||||
tag_wstrb(replace_way) := false.B
|
||||
}
|
||||
}.elsewhen(!io.axi.r.ready) {
|
||||
|
@ -290,12 +309,12 @@ class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Modul
|
|||
}
|
||||
}
|
||||
|
||||
println("ICache: ")
|
||||
println("nindex: " + nindex)
|
||||
println("nbank: " + nbank)
|
||||
println("bankOffsetWidth: " + bankOffsetWidth)
|
||||
println("bytesPerBank: " + bytesPerBank)
|
||||
println("tagWidth: " + tagWidth)
|
||||
println("indexWidth: " + indexWidth)
|
||||
println("offsetWidth: " + offsetWidth)
|
||||
// println("ICache: ")
|
||||
// println("nindex: " + nindex)
|
||||
// println("nbank: " + nbank)
|
||||
// println("bankOffsetWidth: " + bankOffsetWidth)
|
||||
// println("bytesPerBank: " + bytesPerBank)
|
||||
// println("tagWidth: " + tagWidth)
|
||||
// println("indexWidth: " + indexWidth)
|
||||
// println("offsetWidth: " + offsetWidth)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue