feat: 增加dcache

This commit is contained in:
Liphen 2023-12-22 14:18:32 +08:00
parent c7911cb471
commit 76c0f446da
7 changed files with 446 additions and 91 deletions

View File

@ -12,10 +12,7 @@ import pipeline.execute._
import pipeline.memory._
import pipeline.writeback._
import ctrl._
import mmu._
import chisel3.util.experimental.decode.decoder
import cpu.pipeline.fetch.InstFifo
import cache.mmu.ITlbL1
import cache.mmu._
class Core(implicit val config: CpuConfig) extends Module {
val io = IO(new Bundle {
@ -38,10 +35,14 @@ class Core(implicit val config: CpuConfig) extends Module {
val memoryUnit = Module(new MemoryUnit()).io
val writeBackStage = Module(new WriteBackStage()).io
val writeBackUnit = Module(new WriteBackUnit()).io
val tlbL1I = Module(new ITlbL1()).io
val itlbL1 = Module(new ITlbL1()).io
val dtlbL1 = Module(new DTlbL1()).io
tlbL1I.addr := fetchUnit.iCache.pc
tlbL1I.cache <> io.inst.tlb
itlbL1.addr := fetchUnit.iCache.pc
itlbL1.cache <> io.inst.tlb
dtlbL1.addr := memoryUnit.dataMemory.out.addr
dtlbL1.cache <> io.data.tlb
ctrl.decoderUnit <> decoderUnit.ctrl
ctrl.executeUnit <> executeUnit.ctrl
@ -113,7 +114,7 @@ class Core(implicit val config: CpuConfig) extends Module {
memoryUnit.dataMemory.in.acc_err := io.data.acc_err
memoryUnit.dataMemory.in.ready := io.data.dcache_ready
io.data.en := memoryUnit.dataMemory.out.en
io.data.size := memoryUnit.dataMemory.out.rlen
io.data.rlen := memoryUnit.dataMemory.out.rlen
io.data.wen := memoryUnit.dataMemory.out.wen
io.data.wdata := memoryUnit.dataMemory.out.wdata
io.data.addr := memoryUnit.dataMemory.out.addr

View File

@ -14,9 +14,10 @@ class Cache(implicit config: CpuConfig) extends Module {
})
implicit val iCacheConfig = CacheConfig(nset = 64, nbank = 4, bankWidth = 16)
implicit val dCacheConfig = CacheConfig(nset = 128, bankWidth = 4)
val icache = Module(new ICache(iCacheConfig))
val dcache = Module(new DCache())
val dcache = Module(new DCache(dCacheConfig))
val axi_interface = Module(new CacheAXIInterface())
icache.io.axi <> axi_interface.io.icache

View File

@ -1,122 +1,443 @@
// * Cache 设计借鉴了nscscc2021 cqu的cdim * //
package cache
import chisel3._
import chisel3.util._
import memory._
import cpu.CacheConfig
import cpu.defines._
import cpu.CpuConfig
import cpu.defines.Const._
class DCache(implicit config: CpuConfig) extends Module {
class WriteBufferUnit extends Bundle {
val data = UInt(DATA_WID.W)
val addr = UInt(DATA_ADDR_WID.W)
val strb = UInt(4.W)
val size = UInt(2.W)
}
class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module {
val nway: Int = cacheConfig.nway
val nset: Int = cacheConfig.nset
val nbank: Int = cacheConfig.nbank
val bankWidthBits: Int = cacheConfig.bankWidthBits
val tagWidth: Int = cacheConfig.tagWidth
val burstSize: Int = cacheConfig.burstSize
val io = IO(new Bundle {
val cpu = Flipped(new Cache_DCache())
val axi = new DCache_AXIInterface()
})
val tlb_fill = RegInit(false.B)
// * fsm * //
val s_idle :: s_uncached :: s_writeback :: Nil = Enum(3)
val status = RegInit(s_idle)
val s_idle :: s_uncached :: s_writeback :: s_replace :: Nil = Enum(4)
val state = RegInit(s_idle)
val addr_err = io.cpu.addr(63, 32).orR
io.cpu.tlb.fill := tlb_fill
io.cpu.tlb.dcache_is_idle := state === s_idle
// default
val awvalid = RegInit(false.B)
val awaddr = RegInit(0.U(AXI_ADDR_WID.W))
val awsize = RegInit(0.U(AXI_SIZE_WID.W))
io.axi.aw.bits.id := 1.U
io.axi.aw.bits.addr := awaddr
io.axi.aw.bits.len := 0.U
io.axi.aw.bits.size := awsize
io.axi.aw.bits.burst := BURST_INCR.U
io.axi.aw.valid := awvalid
io.axi.aw.bits.prot := 0.U
io.axi.aw.bits.lock := 0.U
io.axi.aw.bits.cache := 0.U
// * valid dirty * //
val valid = RegInit(VecInit(Seq.fill(nset)(VecInit(Seq.fill(nway)(false.B)))))
val dirty = RegInit(VecInit(Seq.fill(nset)(VecInit(Seq.fill(nway)(false.B)))))
val lru = RegInit(VecInit(Seq.fill(nset)(0.U(1.W))))
val wvalid = RegInit(false.B)
val wdata = RegInit(0.U(AXI_DATA_WID.W))
val wstrb = RegInit(0.U(AXI_STRB_WID.W))
io.axi.w.bits.id := 1.U
io.axi.w.bits.data := wdata
io.axi.w.bits.strb := wstrb
io.axi.w.bits.last := 1.U
io.axi.w.valid := wvalid
val write_fifo = Module(new Queue(new WriteBufferUnit(), 4))
io.axi.b.ready := 1.U
write_fifo.io.enq.valid := false.B
write_fifo.io.enq.bits := 0.U.asTypeOf(new WriteBufferUnit())
write_fifo.io.deq.ready := false.B
val araddr = RegInit(0.U(AXI_ADDR_WID.W))
val arsize = RegInit(0.U(AXI_SIZE_WID.W))
val arvalid = RegInit(false.B)
io.axi.ar.bits.id := 1.U
io.axi.ar.bits.addr := araddr
io.axi.ar.bits.len := 0.U
io.axi.ar.bits.size := arsize
io.axi.ar.bits.burst := BURST_INCR.U
io.axi.ar.valid := arvalid
io.axi.ar.bits.prot := 0.U
io.axi.ar.bits.cache := 0.U
io.axi.ar.bits.lock := 0.U
val axi_cnt = Counter(burstSize)
val read_ready_cnt = RegInit(0.U(4.W))
val read_ready_set = RegInit(0.U(6.W))
val rready = RegInit(false.B)
io.axi.r.ready := rready
// * victim cache * //
val victim = RegInit(0.U.asTypeOf(new Bundle {
val valid = Bool()
val set = UInt(6.W)
val waddr = UInt(10.W)
val wstrb = Vec(nway, UInt(4.W))
val working = Bool()
val writeback = Bool()
}))
val victim_cnt = Counter(burstSize)
val victim_addr = Cat(victim.set, victim_cnt.value)
val fset = io.cpu.addr(11, 6)
val fence = RegInit(0.U.asTypeOf(new Bundle {
val working = Bool()
}))
val read_buffer = RegInit(VecInit(Seq.fill(16)(0.U(DATA_WID.W))))
val ar_handshake = RegInit(false.B)
val aw_handshake = RegInit(false.B)
val data_raddr = Mux(victim.valid, victim_addr, io.cpu.addr(11, 2))
val data_wstrb = Wire(Vec(nway, UInt(4.W)))
val data_waddr = Mux(victim.valid, victim.waddr, io.cpu.addr(11, 2))
val data_wdata = Mux(state === s_replace, io.axi.r.bits.data, io.cpu.wdata)
val tag_raddr = Mux(victim.valid, victim.set, io.cpu.addr(11, 6))
val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
val tag_wdata = RegInit(0.U(tagWidth.W))
val data = Wire(Vec(nway, UInt(DATA_WID.W)))
val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
val tag_compare_valid = Wire(Vec(nway, Bool()))
val cache_hit = tag_compare_valid.contains(true.B)
val mmio_read_stall = io.cpu.tlb.uncached && !io.cpu.wen.orR
val mmio_write_stall = io.cpu.tlb.uncached && io.cpu.wen.orR && !write_fifo.io.enq.ready
val cached_stall = !io.cpu.tlb.uncached && !cache_hit
val sel = tag_compare_valid(1)
// * physical set * //
val pset = io.cpu.addr(11, 6)
val dcache_stall = Mux(
state === s_idle && !tlb_fill,
Mux(io.cpu.en, (cached_stall || mmio_read_stall || mmio_write_stall || !io.cpu.tlb.translation_ok), io.cpu.fence),
true.B
)
io.cpu.dcache_ready := !dcache_stall
val saved_rdata = RegInit(0.U(DATA_WID.W))
val acc_err = RegInit(false.B)
val mmio_read_stall = !io.cpu.wen.orR
val mmio_write_stall = io.cpu.wen.orR && !io.axi.w.ready
val cached_stall = false.B
io.cpu.dcache_ready := status === s_idle
io.cpu.rdata := saved_rdata
io.cpu.acc_err := acc_err
// forward last stored data in data bram
val last_waddr = RegNext(data_waddr)
val last_wstrb = RegInit(VecInit(Seq.fill(nway)(0.U(DATA_WID.W))))
val last_wdata = RegNext(data_wdata)
val cache_data_forward = Wire(Vec(nway, UInt(DATA_WID.W)))
switch(status) {
io.cpu.rdata := cache_data_forward(sel)
// bank tagv ram
for { i <- 0 until nway } {
val bank_ram = Module(new SimpleDualPortRam(nset * nbank, bankWidthBits, byteAddressable = true))
bank_ram.io.ren := true.B
bank_ram.io.raddr := data_raddr
data(i) := bank_ram.io.rdata
bank_ram.io.wen := data_wstrb(i).orR
bank_ram.io.waddr := data_waddr
bank_ram.io.wdata := data_wdata
bank_ram.io.wstrb := data_wstrb(i)
val tag_ram = Module(new LUTRam(nset, tagWidth))
tag_ram.io.raddr := tag_raddr
tag(i) := tag_ram.io.rdata
tag_ram.io.wen := tag_wstrb(i)
tag_ram.io.waddr := victim.set
tag_ram.io.wdata := tag_wdata
tag_compare_valid(i) := tag(i) === io.cpu.tlb.tag && valid(pset)(i) && io.cpu.tlb.translation_ok
cache_data_forward(i) := Mux(
last_waddr === io.cpu.addr(11, 2),
((last_wstrb(i) & last_wdata) | (data(i) & (~last_wstrb(i)))),
data(i)
)
data_wstrb(i) := Mux(
tag_compare_valid(i) && io.cpu.en && io.cpu.wen.orR && !io.cpu.tlb.uncached && state === s_idle && !tlb_fill,
io.cpu.wen,
victim.wstrb(i)
)
last_wstrb(i) := Cat(
Fill(8, data_wstrb(i)(3)),
Fill(8, data_wstrb(i)(2)),
Fill(8, data_wstrb(i)(1)),
Fill(8, data_wstrb(i)(0))
)
}
val write_buffer_axi_busy = RegInit(false.B)
val ar = RegInit(0.U.asTypeOf(new AR()))
val arvalid = RegInit(false.B)
io.axi.ar.bits <> ar
io.axi.ar.valid := arvalid
val rready = RegInit(false.B)
io.axi.r.ready := rready
val aw = RegInit(0.U.asTypeOf(new AW()))
val awvalid = RegInit(false.B)
io.axi.aw.bits <> aw
io.axi.aw.valid := awvalid
val w = RegInit(0.U.asTypeOf(new W()))
val wvalid = RegInit(false.B)
io.axi.w.bits <> w
io.axi.w.valid := wvalid
io.axi.b.ready := true.B
val acc_err = RegInit(false.B)
val addr_err = io.cpu.addr(XLEN - 1, VADDR_WID).orR
when(acc_err) {
acc_err := false.B
}
io.cpu.acc_err := acc_err
val current_mmio_write_saved = RegInit(false.B)
// write buffer
when(write_buffer_axi_busy) { // To implement SC memory ordering, when store buffer busy, axi is unseable.
when(io.axi.aw.fire) {
awvalid := false.B
}
when(io.axi.w.fire) {
wvalid := false.B
w.last := false.B
}
when(io.axi.b.fire) {
write_buffer_axi_busy := false.B
}
}.elsewhen(write_fifo.io.deq.valid) {
write_fifo.io.deq.ready := write_fifo.io.deq.valid
when(write_fifo.io.deq.fire) {
aw.addr := write_fifo.io.deq.bits.addr
aw.size := Cat(0.U(1.W), write_fifo.io.deq.bits.size)
w.data := write_fifo.io.deq.bits.data
w.strb := write_fifo.io.deq.bits.strb
}
aw.len := 0.U
awvalid := true.B
w.last := true.B
wvalid := true.B
write_buffer_axi_busy := true.B
}
switch(state) {
is(s_idle) {
acc_err := false.B
when(io.cpu.en) {
when(tlb_fill) {
tlb_fill := false.B
when(!io.cpu.tlb.hit) {
state := s_idle
}
}.elsewhen(io.cpu.en) {
when(addr_err) {
acc_err := true.B
status := s_idle
}.otherwise {
when(io.cpu.wen) {
awaddr := io.cpu.addr(31, 0)
awsize := Cat(false.B, io.cpu.size)
awvalid := true.B
wdata := io.cpu.wdata
wstrb := io.cpu.wstrb
wvalid := true.B
status := s_writeback
}.elsewhen(!io.cpu.tlb.translation_ok) {
when(io.cpu.tlb.tlb1_ok) {
state := s_idle
}.otherwise {
araddr := io.cpu.addr(31, 0)
arsize := Cat(false.B, io.cpu.size)
arvalid := true.B
rready := true.B
status := s_uncached
tlb_fill := true.B
}
}.elsewhen(io.cpu.tlb.uncached) {
when(io.cpu.wen.orR) {
when(write_fifo.io.enq.ready && !current_mmio_write_saved) {
write_fifo.io.enq.valid := true.B
write_fifo.io.enq.bits.addr := Mux(
io.cpu.rlen === 2.U,
Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)),
io.cpu.tlb.pa
)
write_fifo.io.enq.bits.size := io.cpu.rlen
write_fifo.io.enq.bits.strb := io.cpu.wen
write_fifo.io.enq.bits.data := io.cpu.wdata
current_mmio_write_saved := true.B
}
when(io.cpu.dcache_ready && io.cpu.cpu_ready) {
current_mmio_write_saved := false.B
}
}.elsewhen(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) {
ar.addr := Mux(io.cpu.rlen === 2.U, Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)), io.cpu.tlb.pa)
ar.len := 0.U
ar.size := Cat(0.U(1.W), io.cpu.rlen)
arvalid := true.B
state := s_uncached
rready := true.B
} // when store buffer busy, read will stop at s_idle but stall pipeline.
}.otherwise {
when(!cache_hit) {
state := s_replace
axi_cnt.reset()
victim.set := pset
victim_cnt.reset()
read_ready_set := pset
read_ready_cnt := 0.U
victim.waddr := Cat(pset, 0.U(4.W))
victim.valid := true.B
victim.writeback := dirty(pset)(lru(pset))
}.otherwise {
when(io.cpu.dcache_ready) {
// update lru and mark dirty
lru(pset) := ~sel
when(io.cpu.wen.orR) {
dirty(pset)(sel) := true.B
}
when(!io.cpu.cpu_ready) {
saved_rdata := cache_data_forward(sel)
state := s_idle
}
}
}
}
}.elsewhen(io.cpu.fence) {
when(dirty(fset).contains(true.B)) {
when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) {
state := s_writeback
axi_cnt.reset()
victim.set := fset
victim_cnt.reset()
read_ready_set := fset
read_ready_cnt := 0.U
victim.valid := true.B
}
}.otherwise {
when(valid(fset).contains(true.B)) {
valid(fset)(0) := false.B
valid(fset)(1) := false.B
}
state := s_idle
}
}
}
is(s_uncached) {
when(io.axi.ar.ready && io.axi.ar.valid) {
when(arvalid && io.axi.ar.ready) {
arvalid := false.B
}
when(io.axi.r.valid) {
saved_rdata := io.axi.r.bits.data
acc_err := io.axi.r.bits.resp =/= RESP_OKEY.U
status := s_idle
state := s_idle
}
}
is(s_writeback) {
when(io.axi.aw.ready) {
awvalid := false.B
when(fence.working) {
when(victim_cnt.value =/= (burstSize - 1).U) {
victim_cnt.inc()
}
read_ready_set := victim.set
read_ready_cnt := victim_cnt.value
read_buffer(read_ready_cnt) := data(dirty(fset)(1))
when(!aw_handshake) {
aw.addr := Cat(tag(dirty(fset)(1)), fset, 0.U(6.W))
aw.len := 15.U
aw.size := 2.U(3.W)
awvalid := true.B
w.data := data(dirty(fset)(1))
w.strb := 15.U
w.last := false.B
wvalid := true.B
aw_handshake := true.B
}
when(io.axi.aw.fire) {
awvalid := false.B
}
when(io.axi.w.fire) {
when(w.last) {
wvalid := false.B
}.otherwise {
w.data := Mux(
((axi_cnt.value + 1.U) === read_ready_cnt),
data(dirty(fset)(1)),
read_buffer(axi_cnt.value + 1.U)
)
axi_cnt.inc()
when(axi_cnt.value + 1.U === (burstSize - 1).U) {
w.last := true.B
}
}
}
when(io.axi.b.valid) {
dirty(fset)(dirty(fset)(1)) := false.B
fence.working := false.B
victim.valid := false.B
acc_err := io.axi.b.bits.resp =/= RESP_OKEY.U
state := s_idle
}
}.otherwise {
aw_handshake := false.B
fence.working := true.B
victim_cnt.inc()
}
when(io.axi.w.ready) {
wvalid := false.B
}
when(io.axi.b.valid) {
acc_err := io.axi.b.bits.resp =/= RESP_OKEY.U
status := s_idle
}
is(s_replace) {
when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) {
when(victim.working) {
when(victim.writeback) {
when(victim_cnt.value =/= (burstSize - 1).U) {
victim_cnt.inc()
}
read_ready_set := victim.set
read_ready_cnt := victim_cnt.value
read_buffer(read_ready_cnt) := data(lru(pset))
when(!aw_handshake) {
aw.addr := Cat(tag(lru(pset)), pset, 0.U(6.W))
aw.len := 15.U
aw.size := 2.U(3.W)
awvalid := true.B
aw_handshake := true.B
w.data := data(lru(pset))
w.strb := 15.U
w.last := false.B
wvalid := true.B
}
when(io.axi.aw.fire) {
awvalid := false.B
}
when(io.axi.w.fire) {
when(w.last) {
wvalid := false.B
}.otherwise {
w.data := Mux(
((axi_cnt.value + 1.U) === read_ready_cnt),
data(lru(pset)),
read_buffer(axi_cnt.value + 1.U)
)
axi_cnt.inc()
when(axi_cnt.value + 1.U === (burstSize - 1).U) {
w.last := true.B
}
}
}
when(io.axi.b.valid) {
dirty(pset)(lru(pset)) := false.B
victim.writeback := false.B
}
}
when(!ar_handshake) {
ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W))
ar.len := 15.U
ar.size := 2.U(3.W)
arvalid := true.B
rready := true.B
ar_handshake := true.B
victim.wstrb(lru(pset)) := 15.U
tag_wstrb(lru(pset)) := true.B
tag_wdata := io.cpu.tlb.pa(31, 12)
}
when(io.axi.ar.fire) {
tag_wstrb(lru(pset)) := false.B
arvalid := false.B
}
when(io.axi.r.fire) {
when(io.axi.r.bits.last) {
rready := false.B
victim.wstrb(lru(pset)) := 0.U
}.otherwise {
victim.waddr := victim.waddr + 1.U
}
}
when(
(!victim.writeback || io.axi.b.valid) && ((ar_handshake && io.axi.r.valid && io.axi.r.bits.last) || (ar_handshake && !rready))
) {
victim.valid := false.B
valid(pset)(lru(pset)) := true.B
}
when(!victim.valid) {
victim.working := false.B
state := s_idle
}
}.otherwise {
ar_handshake := false.B
aw_handshake := false.B
victim.working := true.B
victim_cnt.inc()
}
}
}
}

View File

@ -0,0 +1,20 @@
package cache.mmu
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
class DTlbL1 extends Module {
val io = IO(new Bundle {
val cache = new Tlb_DCache()
val addr = Input(UInt(DATA_ADDR_WID.W))
})
io.cache.uncached := AddressSpace.isMMIO(io.addr)
io.cache.translation_ok := true.B
io.cache.hit := true.B
io.cache.tlb1_ok := true.B
io.cache.tag := io.addr(XLEN - 1, 12)
io.cache.pa := Cat(io.cache.tag, io.addr(11, 0))
}

View File

@ -10,12 +10,10 @@ class ITlbL1 extends Module {
val addr = Input(UInt(PC_WID.W))
val cache = new Tlb_ICache()
})
val vpn = io.addr(31, 12)
val direct_mapped = io.addr(31, 30) === 2.U(2.W)
io.cache.uncached := AddressSpace.isMMIO(io.addr)
io.cache.translation_ok := true.B
io.cache.hit := true.B
io.cache.tag := io.addr(31, 12)
io.cache.tag := io.addr(XLEN - 1, 12)
io.cache.pa := Cat(io.cache.tag, io.addr(11, 0))
}

View File

@ -121,7 +121,7 @@ class Cache_ICache(implicit val config: CpuConfig) extends Bundle {
// cpu to dcache
class Cache_DCache extends Bundle {
val addr = Output(UInt(DATA_ADDR_WID.W))
val size = Output(UInt(2.W))
val rlen = Output(UInt(2.W))
val en = Output(Bool())
val wen = Output(Bool())
val wdata = Output(UInt(XLEN.W))
@ -132,6 +132,8 @@ class Cache_DCache extends Bundle {
val rdata = Input(UInt(XLEN.W))
val acc_err = Input(Bool())
val dcache_ready = Input(Bool())
val tlb = new Tlb_DCache()
}
// axi
@ -174,8 +176,8 @@ class W extends Bundle {
}
class B extends Bundle {
val id = UInt(AXI_ID_WID.W)
val resp = UInt(AXI_RESP_WID.W)
val id = UInt(AXI_ID_WID.W)
val resp = UInt(AXI_RESP_WID.W)
}
class ICache_AXIInterface extends Bundle {

View File

@ -122,3 +122,15 @@ class Tlb_ICache extends Bundle {
val tag = Output(UInt(20.W))
val pa = Output(UInt(32.W))
}
class Tlb_DCache extends Bundle {
val fill = Input(Bool())
val dcache_is_idle = Input(Bool())
val uncached = Output(Bool())
val tlb1_ok = Output(Bool())
val translation_ok = Output(Bool())
val hit = Output(Bool())
val tag = Output(UInt(20.W))
val pa = Output(UInt(32.W))
}