修改了部分前端设计,去除cache、tlb

This commit is contained in:
Liphen 2023-11-12 15:50:49 +08:00
parent e9a45b8c18
commit 8913ae5da0
18 changed files with 277 additions and 1209 deletions

@ -1 +1 @@
Subproject commit f41fe9897f5b0ed213e270ffdd2f8b179ef37a29
Subproject commit 1f6c6a632c18a0fd1daf6b1c09a8fa56717b7679

View File

@ -0,0 +1,98 @@
module top_axi_wrapper(
input clock,
input reset,
// Interrupts
input MEI, // to PLIC
input MSI, // to CLINT
input MTI, // to CLINT
// aw
output [3:0]MAXI_awid,
output[31:0]MAXI_awaddr,
output [7:0]MAXI_awlen,
output [2:0]MAXI_awsize,
output [1:0]MAXI_awburst,
output MAXI_awvalid,
input MAXI_awready,
// w
output[63:0]MAXI_wdata,
output [7:0]MAXI_wstrb,
output MAXI_wlast,
output MAXI_wvalid,
input MAXI_wready,
// b
input [3:0]MAXI_bid,
input [1:0]MAXI_bresp,
input MAXI_bvalid,
output MAXI_bready,
// ar
output [3:0]MAXI_arid,
output[31:0]MAXI_araddr,
output [7:0]MAXI_arlen,
output [2:0]MAXI_arsize,
output [1:0]MAXI_arburst,
output MAXI_arvalid,
input MAXI_arready,
// r
input [3:0]MAXI_rid,
input [63:0]MAXI_rdata,
input [1:0]MAXI_rresp,
input MAXI_rlast,
input MAXI_rvalid,
output MAXI_rready,
// debug
output debug_commit,
output[63:0]debug_pc,
output[4:0] debug_reg_num,
output[63:0]debug_wdata
);
RiscVTop core(
.aclk (clock),
.aresetn (~reset),
// Interrupts
.MEI (MEI), // to PLIC
.MSI (MSI), // to CLINT
.MTI (MTI), // to CLINT
// aw
.awid (MAXI_awid),
.awaddr (MAXI_awaddr),
.awlen (MAXI_awlen),
.awsize (MAXI_awsize),
.awburst (MAXI_awburst),
.awvalid (MAXI_awvalid),
.awready (MAXI_awready),
// w
.wdata (MAXI_wdata),
.wstrb (MAXI_wstrb),
.wlast (MAXI_wlast),
.wvalid (MAXI_wvalid),
.wready (MAXI_wready),
// b
.bid (MAXI_bid),
.bresp (MAXI_bresp),
.bvalid (MAXI_bvalid),
.bready (MAXI_bready),
// ar
.arid (MAXI_arid),
.araddr (MAXI_araddr),
.arlen (MAXI_arlen),
.arsize (MAXI_arsize),
.arburst (MAXI_arburst),
.arvalid (MAXI_arvalid),
.arready (MAXI_arready),
// r
.rid (MAXI_rid),
.rdata (MAXI_rdata),
.rresp (MAXI_rresp),
.rlast (MAXI_rlast),
.rvalid (MAXI_rvalid),
.rready (MAXI_rready),
// debug
.debug_commit (debug_commit),
.debug_pc (debug_pc),
.debug_reg_num (debug_reg_num),
.debug_wdata (debug_wdata)
);
endmodule

View File

@ -38,22 +38,6 @@ class Core(implicit val config: CpuConfig) extends Module {
val memoryUnit = Module(new MemoryUnit()).io
val writeBackStage = Module(new WriteBackStage()).io
val writeBackUnit = Module(new WriteBackUnit()).io
val tlbL1I = Module(new TlbL1I()).io
val tlbL1D = Module(new TlbL1D()).io
tlbL1I.addr := fetchUnit.iCache.pc
tlbL1I.fence := executeUnit.executeStage.inst0.inst_info.tlbfence
tlbL1I.cpu_stall := !ctrl.fetchUnit.allow_to_go
tlbL1I.icache_stall := io.inst.icache_stall
tlbL1I.cache <> io.inst.tlb
tlbL1D.addr := memoryUnit.dataMemory.out.addr
tlbL1D.fence := memoryUnit.memoryStage.inst0.inst_info.tlbfence
tlbL1D.cpu_stall := !ctrl.memoryUnit.allow_to_go
tlbL1D.dcache_stall := io.data.dcache_stall
tlbL1D.mem_write := memoryUnit.dataMemory.out.wen.orR
tlbL1D.mem_en := memoryUnit.dataMemory.out.en
tlbL1D.cache <> io.data.tlb
ctrl.instFifo.has2insts := !(instFifo.empty || instFifo.almost_empty)
ctrl.decoderUnit <> decoderUnit.ctrl
@ -92,16 +76,8 @@ class Core(implicit val config: CpuConfig) extends Module {
decoderUnit.bpu.branch_target := bpu.decoder.branch_target
instFifo.do_flush := ctrl.decoderUnit.do_flush
instFifo.flush_delay_slot := ctrl.instFifo.delay_slot_do_flush
instFifo.icache_stall := io.inst.icache_stall
instFifo.jump_branch_inst := decoderUnit.instFifo.jump_branch_inst
instFifo.delay_sel_flush := Mux(
ctrl.executeUnit.branch,
!(executeUnit.memoryStage.inst1.ex.bd || decoderUnit.executeStage.inst0.ex.bd),
Mux(ctrl.decoderUnit.branch, !decoderUnit.instFifo.allow_to_go(1), false.B),
)
instFifo.decoder_delay_flush := ctrl.decoderUnit.branch
instFifo.execute_delay_flush := ctrl.executeUnit.branch
instFifo.ren <> decoderUnit.instFifo.allow_to_go
decoderUnit.instFifo.inst <> instFifo.read
@ -109,15 +85,12 @@ class Core(implicit val config: CpuConfig) extends Module {
instFifo.write(i).pht_index := bpu.instBuffer.pht_index(i)
bpu.instBuffer.pc(i) := instFifo.write(i).pc
instFifo.wen(i) := io.inst.inst_valid(i)
instFifo.write(i).tlb.refill := tlbL1I.tlb1.refill
instFifo.write(i).tlb.invalid := tlbL1I.tlb1.invalid
instFifo.write(i).pc := io.inst.addr(0) + (i * 4).U
instFifo.write(i).inst := io.inst.inst(i)
}
decoderUnit.instFifo.info.empty := instFifo.empty
decoderUnit.instFifo.info.almost_empty := instFifo.almost_empty
decoderUnit.instFifo.info.inst0_is_in_delayslot := instFifo.inst0_is_in_delayslot
decoderUnit.regfile <> regfile.read
for (i <- 0 until (config.fuNum)) {
decoderUnit.forward(i).exe := executeUnit.decoderUnit.forward(i).exe
@ -142,13 +115,8 @@ class Core(implicit val config: CpuConfig) extends Module {
cp0.ctrl.exe_stall := !ctrl.executeUnit.allow_to_go
cp0.ctrl.mem_stall := !ctrl.memoryUnit.allow_to_go
cp0.tlb(0).vpn2 := tlbL1I.tlb2.vpn2
cp0.tlb(1).vpn2 := tlbL1D.tlb2.vpn2
cp0.ext_int := io.ext_int
tlbL1I.tlb2.found := cp0.tlb(0).found
tlbL1D.tlb2.found := cp0.tlb(1).found
tlbL1I.tlb2.entry := cp0.tlb(0).info
tlbL1D.tlb2.entry := cp0.tlb(1).info
memoryStage.ctrl.allow_to_go := ctrl.memoryUnit.allow_to_go
memoryStage.ctrl.clear := ctrl.memoryUnit.do_flush
@ -157,7 +125,6 @@ class Core(implicit val config: CpuConfig) extends Module {
memoryUnit.cp0 <> cp0.memoryUnit
memoryUnit.writeBackStage <> writeBackStage.memoryUnit
memoryUnit.dataMemory.in.tlb <> tlbL1D.tlb1
memoryUnit.dataMemory.in.rdata := io.data.rdata
io.data.en := memoryUnit.dataMemory.out.en
io.data.rlen := memoryUnit.dataMemory.out.rlen

View File

@ -17,442 +17,12 @@ class WriteBufferUnit extends Bundle {
}
class DCache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module {
val nway: Int = cacheConfig.nway
val nset: Int = cacheConfig.nset
val nbank: Int = cacheConfig.nbank
val bankWidthBits: Int = cacheConfig.bankWidthBits
val tagWidth: Int = cacheConfig.tagWidth
val burstSize: Int = cacheConfig.burstSize
val io = IO(new Bundle {
val cpu = Flipped(new Cache_DCache())
val axi = new DCache_AXIInterface()
val statistic = if (!config.build) Some(new DCacheStatistic()) else None
})
val tlb_fill = RegInit(false.B)
// * fsm * //
val s_idle :: s_uncached :: s_writeback :: s_replace :: s_save :: Nil = Enum(5)
val state = RegInit(s_idle)
io.cpu.tlb.fill := tlb_fill
io.cpu.tlb.dcache_is_idle := state === s_idle
io.cpu.tlb.dcache_is_save := state === s_save
// * valid dirty * //
val valid = RegInit(VecInit(Seq.fill(nset)(VecInit(Seq.fill(nway)(false.B)))))
val dirty = RegInit(VecInit(Seq.fill(nset)(VecInit(Seq.fill(nway)(false.B)))))
val lru = RegInit(VecInit(Seq.fill(nset)(0.U(1.W))))
val should_next_addr = (state === s_idle && !tlb_fill) || (state === s_save)
val write_fifo = Module(new Queue(new WriteBufferUnit(), 4))
write_fifo.io.enq.valid := false.B
write_fifo.io.enq.bits := 0.U.asTypeOf(new WriteBufferUnit())
write_fifo.io.deq.ready := false.B
val axi_cnt = Counter(burstSize)
val read_ready_cnt = RegInit(0.U(4.W))
val read_ready_set = RegInit(0.U(6.W))
// * victim cache * //
val victim = RegInit(0.U.asTypeOf(new Bundle {
val valid = Bool()
val set = UInt(6.W)
val waddr = UInt(10.W)
val wstrb = Vec(nway, UInt(4.W))
val working = Bool()
val writeback = Bool()
}))
val victim_cnt = Counter(burstSize)
val victim_addr = Cat(victim.set, victim_cnt.value)
val fset = io.cpu.fence_addr(11, 6)
val fence = RegInit(0.U.asTypeOf(new Bundle {
val working = Bool()
}))
val read_buffer = RegInit(VecInit(Seq.fill(16)(0.U(DATA_WID.W))))
val ar_handshake = RegInit(false.B)
val aw_handshake = RegInit(false.B)
val data_raddr = Mux(victim.valid, victim_addr, Mux(should_next_addr, io.cpu.execute_addr(11, 2), io.cpu.addr(11, 2)))
val data_wstrb = Wire(Vec(nway, UInt(4.W)))
val data_waddr = Mux(victim.valid, victim.waddr, io.cpu.addr(11, 2))
val data_wdata = Mux(state === s_replace, io.axi.r.bits.data, io.cpu.wdata)
val tag_raddr = Mux(victim.valid, victim.set, Mux(should_next_addr, io.cpu.execute_addr(11, 6), io.cpu.addr(11, 6)))
val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
val tag_wdata = RegInit(0.U(tagWidth.W))
val data = Wire(Vec(nway, UInt(DATA_WID.W)))
val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
val tag_compare_valid = Wire(Vec(nway, Bool()))
val cache_hit = tag_compare_valid.contains(true.B)
val mmio_read_stall = io.cpu.tlb.uncached && !io.cpu.wen.orR
val mmio_write_stall = io.cpu.tlb.uncached && io.cpu.wen.orR && !write_fifo.io.enq.ready
val cached_stall = !io.cpu.tlb.uncached && !cache_hit
val sel = tag_compare_valid(1)
// * physical set * //
val pset = io.cpu.addr(11, 6)
io.cpu.dcache_stall := Mux(
state === s_idle && !tlb_fill,
Mux(io.cpu.en, (cached_stall || mmio_read_stall || mmio_write_stall || !io.cpu.tlb.translation_ok), io.cpu.fence),
state =/= s_save,
)
val saved_rdata = RegInit(0.U(DATA_WID.W))
// forward last stored data in data bram
val last_waddr = RegNext(data_waddr)
val last_wstrb = RegInit(VecInit(Seq.fill(nway)(0.U(DATA_WID.W))))
val last_wdata = RegNext(data_wdata)
val cache_data_forward = Wire(Vec(nway, UInt(DATA_WID.W)))
io.cpu.rdata := Mux(state === s_save, saved_rdata, cache_data_forward(sel))
// bank tagv ram
for { i <- 0 until nway } {
val bank_ram = Module(new SimpleDualPortRam(nset * nbank, bankWidthBits, byteAddressable = true))
bank_ram.io.ren := true.B
bank_ram.io.raddr := data_raddr
data(i) := bank_ram.io.rdata
bank_ram.io.wen := data_wstrb(i).orR
bank_ram.io.waddr := data_waddr
bank_ram.io.wdata := data_wdata
bank_ram.io.wstrb := data_wstrb(i)
val tag_ram = Module(new LUTRam(nset, tagWidth))
tag_ram.io.raddr := tag_raddr
tag(i) := tag_ram.io.rdata
tag_ram.io.wen := tag_wstrb(i)
tag_ram.io.waddr := victim.set
tag_ram.io.wdata := tag_wdata
tag_compare_valid(i) := tag(i) === io.cpu.tlb.tag && valid(pset)(i) && io.cpu.tlb.translation_ok
cache_data_forward(i) := Mux(
last_waddr === io.cpu.addr(11, 2),
((last_wstrb(i) & last_wdata) | (data(i) & (~last_wstrb(i)))),
data(i),
)
data_wstrb(i) := Mux(
tag_compare_valid(i) && io.cpu.en && io.cpu.wen.orR && !io.cpu.tlb.uncached && state === s_idle && !tlb_fill,
io.cpu.wen,
victim.wstrb(i),
)
last_wstrb(i) := Cat(
Fill(8, data_wstrb(i)(3)),
Fill(8, data_wstrb(i)(2)),
Fill(8, data_wstrb(i)(1)),
Fill(8, data_wstrb(i)(0)),
)
}
val write_buffer_axi_busy = RegInit(false.B)
val ar = RegInit(0.U.asTypeOf(new AR()))
val arvalid = RegInit(false.B)
io.axi.ar.bits <> ar
io.axi.ar.valid := arvalid
val rready = RegInit(false.B)
io.axi.r.ready := rready
val aw = RegInit(0.U.asTypeOf(new AW()))
val awvalid = RegInit(false.B)
io.axi.aw.bits <> aw
io.axi.aw.valid := awvalid
val w = RegInit(0.U.asTypeOf(new W()))
val wvalid = RegInit(false.B)
io.axi.w.bits <> w
io.axi.w.valid := wvalid
io.axi.b.ready := true.B
val current_mmio_write_saved = RegInit(false.B)
// write buffer
when(write_buffer_axi_busy) { // To implement SC memory ordering, when store buffer busy, axi is unseable.
when(io.axi.aw.fire) {
awvalid := false.B
}
when(io.axi.w.fire) {
wvalid := false.B
w.last := false.B
}
when(io.axi.b.fire) {
write_buffer_axi_busy := false.B
}
}.elsewhen(write_fifo.io.deq.valid) {
write_fifo.io.deq.ready := write_fifo.io.deq.valid
when(write_fifo.io.deq.fire) {
aw.addr := write_fifo.io.deq.bits.addr
aw.size := Cat(0.U(1.W), write_fifo.io.deq.bits.size)
w.data := write_fifo.io.deq.bits.data
w.strb := write_fifo.io.deq.bits.strb
}
aw.len := 0.U
awvalid := true.B
w.last := true.B
wvalid := true.B
write_buffer_axi_busy := true.B
}
switch(state) {
is(s_idle) {
when(tlb_fill) {
tlb_fill := false.B
when(!io.cpu.tlb.hit) {
state := s_save
}
}.elsewhen(io.cpu.en) {
when(!io.cpu.tlb.translation_ok) {
when(io.cpu.tlb.tlb1_ok) {
state := s_save
}.otherwise {
tlb_fill := true.B
}
}.elsewhen(io.cpu.tlb.uncached) {
when(io.cpu.wen.orR) {
when(write_fifo.io.enq.ready && !current_mmio_write_saved) {
write_fifo.io.enq.valid := true.B
write_fifo.io.enq.bits.addr := Mux(
io.cpu.rlen === 2.U,
Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)),
io.cpu.tlb.pa,
)
write_fifo.io.enq.bits.size := io.cpu.rlen
write_fifo.io.enq.bits.strb := io.cpu.wen
write_fifo.io.enq.bits.data := io.cpu.wdata
current_mmio_write_saved := true.B
}
when(!io.cpu.dcache_stall && !io.cpu.cpu_stall) {
current_mmio_write_saved := false.B
}
}.elsewhen(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) {
ar.addr := Mux(io.cpu.rlen === 2.U, Cat(io.cpu.tlb.pa(31, 2), 0.U(2.W)), io.cpu.tlb.pa)
ar.len := 0.U
ar.size := Cat(0.U(1.W), io.cpu.rlen)
arvalid := true.B
state := s_uncached
rready := true.B
} // when store buffer busy, read will stop at s_idle but stall pipeline.
}.otherwise {
when(!cache_hit) {
state := s_replace
axi_cnt.reset()
victim.set := pset
victim_cnt.reset()
read_ready_set := pset
read_ready_cnt := 0.U
victim.waddr := Cat(pset, 0.U(4.W))
victim.valid := true.B
victim.writeback := dirty(pset)(lru(pset))
}.otherwise {
when(!io.cpu.dcache_stall) {
// update lru and mark dirty
lru(pset) := ~sel
when(io.cpu.wen.orR) {
dirty(pset)(sel) := true.B
}
when(io.cpu.cpu_stall) {
saved_rdata := cache_data_forward(sel)
state := s_save
}
}
}
}
}.elsewhen(io.cpu.fence) {
when(dirty(fset).contains(true.B)) {
when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) {
state := s_writeback
axi_cnt.reset()
victim.set := fset
victim_cnt.reset()
read_ready_set := fset
read_ready_cnt := 0.U
victim.valid := true.B
}
}.otherwise {
when(valid(fset).contains(true.B)) {
valid(fset)(0) := false.B
valid(fset)(1) := false.B
}
state := s_save
}
}
}
is(s_uncached) {
when(arvalid && io.axi.ar.ready) {
arvalid := false.B
}
when(io.axi.r.valid) {
saved_rdata := io.axi.r.bits.data
state := s_save
}
}
is(s_writeback) {
when(fence.working) {
when(victim_cnt.value =/= (burstSize - 1).U) {
victim_cnt.inc()
}
read_ready_set := victim.set
read_ready_cnt := victim_cnt.value
read_buffer(read_ready_cnt) := data(dirty(fset)(1))
when(!aw_handshake) {
aw.addr := Cat(tag(dirty(fset)(1)), fset, 0.U(6.W))
aw.len := 15.U
aw.size := 2.U(3.W)
awvalid := true.B
w.data := data(dirty(fset)(1))
w.strb := 15.U
w.last := false.B
wvalid := true.B
aw_handshake := true.B
}
when(io.axi.aw.fire) {
awvalid := false.B
}
when(io.axi.w.fire) {
when(w.last) {
wvalid := false.B
}.otherwise {
w.data := Mux(
((axi_cnt.value + 1.U) === read_ready_cnt),
data(dirty(fset)(1)),
read_buffer(axi_cnt.value + 1.U),
)
axi_cnt.inc()
when(axi_cnt.value + 1.U === (burstSize - 1).U) {
w.last := true.B
}
}
}
when(io.axi.b.valid) {
dirty(fset)(dirty(fset)(1)) := false.B
fence.working := false.B
victim.valid := false.B
state := s_idle
}
}.otherwise {
aw_handshake := false.B
fence.working := true.B
victim_cnt.inc()
}
}
is(s_replace) {
when(!(write_fifo.io.deq.valid || write_buffer_axi_busy)) {
when(victim.working) {
when(victim.writeback) {
when(victim_cnt.value =/= (burstSize - 1).U) {
victim_cnt.inc()
}
read_ready_set := victim.set
read_ready_cnt := victim_cnt.value
read_buffer(read_ready_cnt) := data(lru(pset))
when(!aw_handshake) {
aw.addr := Cat(tag(lru(pset)), pset, 0.U(6.W))
aw.len := 15.U
aw.size := 2.U(3.W)
awvalid := true.B
aw_handshake := true.B
w.data := data(lru(pset))
w.strb := 15.U
w.last := false.B
wvalid := true.B
}
when(io.axi.aw.fire) {
awvalid := false.B
}
when(io.axi.w.fire) {
when(w.last) {
wvalid := false.B
}.otherwise {
w.data := Mux(
((axi_cnt.value + 1.U) === read_ready_cnt),
data(lru(pset)),
read_buffer(axi_cnt.value + 1.U),
)
axi_cnt.inc()
when(axi_cnt.value + 1.U === (burstSize - 1).U) {
w.last := true.B
}
}
}
when(io.axi.b.valid) {
dirty(pset)(lru(pset)) := false.B
victim.writeback := false.B
}
}
when(!ar_handshake) {
ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W))
ar.len := 15.U
ar.size := 2.U(3.W)
arvalid := true.B
rready := true.B
ar_handshake := true.B
victim.wstrb(lru(pset)) := 15.U
tag_wstrb(lru(pset)) := true.B
tag_wdata := io.cpu.tlb.pa(31, 12)
}
when(io.axi.ar.fire) {
tag_wstrb(lru(pset)) := false.B
arvalid := false.B
}
when(io.axi.r.fire) {
when(io.axi.r.bits.last) {
rready := false.B
victim.wstrb(lru(pset)) := 0.U
}.otherwise {
victim.waddr := victim.waddr + 1.U
}
}
when(
(!victim.writeback || io.axi.b.valid) && ((ar_handshake && io.axi.r.valid && io.axi.r.bits.last) || (ar_handshake && !rready)),
) {
victim.valid := false.B
valid(pset)(lru(pset)) := true.B
}
when(!victim.valid) {
victim.working := false.B
state := s_idle
}
}.otherwise {
ar_handshake := false.B
aw_handshake := false.B
victim.working := true.B
victim_cnt.inc()
}
}
}
is(s_save) {
when(!io.cpu.dcache_stall && !io.cpu.cpu_stall) {
state := s_idle
}
}
}
// ===----------------------------------------------------------------===
// statistic
// ===----------------------------------------------------------------===
val req_cnt = RegInit(0.U(32.W))
when(io.cpu.en) {
req_cnt := req_cnt + 1.U
}
val hit_cnt = RegInit(0.U(32.W))
when(cache_hit) {
hit_cnt := hit_cnt + 1.U
}
if (!config.build) {
io.statistic.get.request := req_cnt
io.statistic.get.hit := hit_cnt
}
val s_idle :: s_read :: s_write :: s_finishwait :: Nil = Enum(4)
val state = RegInit(s_idle)
}

View File

@ -4,246 +4,66 @@ package cache
import chisel3._
import chisel3.util._
import memory._
import cpu.CacheConfig
import cpu.defines._
import cpu.CpuConfig
import cpu.defines.Const._
class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module {
val nway: Int = cacheConfig.nway
val nset: Int = cacheConfig.nset
val nbank: Int = cacheConfig.nbank
val ninst: Int = cacheConfig.ninst // 取指令的数量
val bankOffsetWidth: Int = cacheConfig.bankOffsetWidth
val bankWidth: Int = cacheConfig.bankWidth
val tagWidth: Int = cacheConfig.tagWidth
val indexWidth: Int = cacheConfig.indexWidth
val offsetWidth: Int = cacheConfig.offsetWidth
class ICache(implicit config: CpuConfig) extends Module {
val io = IO(new Bundle {
val cpu = Flipped(new Cache_ICache())
val axi = new ICache_AXIInterface()
val statistic = if (!config.build) Some(new ICacheStatistic()) else None
val cpu = Flipped(new Cache_ICache())
val axi = new ICache_AXIInterface()
})
require(isPow2(ninst), "ninst must be power of 2")
// * addr organization * //
// ======================================
// | tag | index |offset|
// |31 12|11 6|5 0|
// ======================================
// | offset |
// | bank index | bank offset |
// | 5 4 | 3 2 |
// ============================
val tlb_fill = RegInit(false.B)
// * fsm * //
val s_idle :: s_uncached :: s_replace :: s_save :: Nil = Enum(4)
val state = RegInit(s_idle)
val s_idle :: s_read :: s_finishwait :: Nil = Enum(3)
val status = RegInit(s_idle)
// * nway * nset * //
// * 128 bit for 4 inst * //
// =========================================================
// | valid | tag | bank 0 | bank 1 | bank 2 | bank 3 |
// | 1 | 20 | 128 | 128 | 128 | 128 |
// =========================================================
// | bank |
// | inst 0 | inst 1 | inst 2 | inst 3 |
// | 32 | 32 | 32 | 32 |
// =====================================
val instperbank = bankWidth / 4 // 每个bank存储的指令数
val valid = RegInit(VecInit(Seq.fill(nset * nbank)(VecInit(Seq.fill(instperbank)(false.B)))))
io.cpu.valid := status === s_finishwait
val addr_err = io.cpu.addr.orR
val data = Wire(Vec(nway, Vec(instperbank, UInt(DATA_WID.W))))
val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
io.axi.ar.addr := 0.U
io.axi.ar.len := 0.U
io.axi.ar.size := 2.U
io.axi.ar.burst := BURST_FIXED.U
io.axi.ar.valid := false.B
io.axi.r.ready := true.B
io.cpu.rdata := 0.U
io.cpu.acc_err := false.B
// * should choose next addr * //
val should_next_addr = (state === s_idle && !tlb_fill) || (state === s_save)
val data_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, bankOffsetWidth)
val data_wstrb = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(instperbank)(0.U(4.W))))))
val tag_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
val tag_wdata = RegInit(0.U(tagWidth.W))
// * lru * //
val lru = RegInit(VecInit(Seq.fill(nset * nbank)(false.B)))
// * itlb * //
when(tlb_fill) { tlb_fill := false.B }
io.cpu.tlb.fill := tlb_fill
io.cpu.tlb.icache_is_save := (state === s_save)
// * fence * //
val fence_index = io.cpu.fence_addr(indexWidth + offsetWidth - 1, offsetWidth)
when(io.cpu.fence && !io.cpu.icache_stall && !io.cpu.cpu_stall) {
valid(fence_index) := VecInit(Seq.fill(instperbank)(false.B))
}
// * replace set * //
val rset = RegInit(0.U(6.W))
// * virtual set * //
val vset = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth)
// * cache hit * //
val tag_compare_valid = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.tag && valid(vset)(i)))
val cache_hit = tag_compare_valid.contains(true.B)
val cache_hit_available = cache_hit && io.cpu.tlb.translation_ok && !io.cpu.tlb.uncached
val sel = tag_compare_valid(1)
val bank_offset = io.cpu.addr(0)(log2Ceil(instperbank) + 1, 2)
val inst = VecInit(Seq.tabulate(instperbank)(i => Mux(i.U <= (3.U - bank_offset), data(sel)(i.U + bank_offset), 0.U)))
val inst_valid = VecInit(Seq.tabulate(instperbank)(i => cache_hit_available && i.U <= (3.U - bank_offset)))
val saved = RegInit(VecInit(Seq.fill(instperbank)(0.U.asTypeOf(new Bundle {
val inst = UInt(PC_WID.W)
val valid = Bool()
}))))
val axi_cnt = Counter(cacheConfig.burstSize)
// bank tag ram
for { i <- 0 until nway; j <- 0 until instperbank } {
val bank = Module(new SimpleDualPortRam(nset * nbank, DATA_WID, byteAddressable = true))
bank.io.ren := true.B
bank.io.raddr := data_raddr
data(i)(j) := bank.io.rdata
bank.io.wen := data_wstrb(i)(j).orR
bank.io.waddr := Cat(rset, axi_cnt.value(log2Ceil(cacheConfig.burstSize) - 1, log2Ceil(instperbank)))
bank.io.wdata := Mux(j.U === axi_cnt.value(log2Ceil(instperbank) - 1, 0), io.axi.r.bits.data, 0.U)
bank.io.wstrb := data_wstrb(i)(j)
}
for { i <- 0 until ninst } {
io.cpu.inst_valid(i) := Mux(state === s_idle && !tlb_fill, inst_valid(i), saved(i).valid) && io.cpu.req
io.cpu.inst(i) := Mux(state === s_idle && !tlb_fill, inst(i), saved(i).inst)
}
for { i <- 0 until nway } {
val tag_bram = Module(new LUTRam(nset, tagWidth))
tag_bram.io.raddr := tag_raddr
tag(i) := tag_bram.io.rdata
tag_bram.io.wen := tag_wstrb(i)
tag_bram.io.waddr := rset
tag_bram.io.wdata := tag_wdata
}
io.cpu.icache_stall := Mux(state === s_idle && !tlb_fill, (!cache_hit_available && io.cpu.req), state =/= s_save)
val ar = RegInit(0.U.asTypeOf(new AR()))
val arvalid = RegInit(false.B)
ar <> io.axi.ar.bits
arvalid <> io.axi.ar.valid
val r = RegInit(0.U.asTypeOf(new R()))
val rready = RegInit(false.B)
r <> io.axi.r.bits
rready <> io.axi.r.ready
when(tlb_fill === true.B) {
tlb_fill := false.B
}
switch(state) {
switch(status) {
is(s_idle) {
when(tlb_fill) {
when(!io.cpu.tlb.hit) {
state := s_save
saved(0).inst := 0.U
saved(0).valid := true.B
}
}.elsewhen(io.cpu.req) {
when(!io.cpu.tlb.translation_ok) {
tlb_fill := true.B
}.elsewhen(io.cpu.tlb.uncached) {
state := s_uncached
ar.addr := io.cpu.tlb.pa
ar.len := 0.U(log2Ceil((nbank * bankWidth) / 4).W)
ar.size := 2.U(bankOffsetWidth.W)
arvalid := true.B
}.elsewhen(!cache_hit) {
state := s_replace
ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W))
ar.len := 15.U(log2Ceil((nbank * bankWidth) / 4).W)
ar.size := 2.U(bankOffsetWidth.W)
arvalid := true.B
rset := vset
(0 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := Mux(i.U === 0.U, 0xf.U, 0x0.U))
tag_wstrb(lru(vset)) := true.B
tag_wdata := io.cpu.tlb.tag
valid(vset)(lru(vset)) := true.B
axi_cnt.reset()
}.elsewhen(!io.cpu.icache_stall) {
lru(vset) := ~sel
when(io.cpu.cpu_stall) {
state := s_save
(1 until instperbank).foreach(i => saved(i).inst := data(sel)(i))
(0 until instperbank).foreach(i => saved(i).valid := inst_valid(i))
when(io.cpu.en) {
io.cpu.acc_err := true.B
status := s_finishwait
}.otherwise {
io.axi.ar.addr := Cat(io.cpu.addr(31, 2), 0.U(2.W))
io.axi.ar.valid := true.B
status := s_read
}
}
is(s_read) {
when(io.axi.ar.ready) {
io.axi.ar.valid := false.B
}
when(io.axi.r.valid) {
io.cpu.rdata := Mux(io.axi.ar.addr(2), io.axi.r.data(63, 32), io.axi.r.data(31, 0))
io.cpu.acc_err := io.axi.r.resp =/= RESP_OKEY.U
status := s_finishwait
}
}
is(s_finishwait) {
when(io.cpu.ready) {
io.cpu.acc_err := false.B
when(io.cpu.en) {
when(addr_err) {
io.cpu.acc_err := true.B
status := s_finishwait
}.otherwise {
io.axi.ar.addr := Cat(io.cpu.addr(31, 2), 0.U(2.W))
io.axi.ar.valid := true.B
status := s_read
}
}
}
}
is(s_uncached) {
when(io.axi.ar.valid) {
when(io.axi.ar.ready) {
arvalid := false.B
rready := true.B
}
}.elsewhen(io.axi.r.fire) {
// * uncached not support burst transport * //
state := s_save
saved(0).inst := io.axi.r.bits.data
saved(0).valid := true.B
rready := false.B
}
}
is(s_replace) {
when(io.axi.ar.valid) {
when(io.axi.ar.ready) {
arvalid := false.B
rready := true.B
}
}.elsewhen(io.axi.r.fire) {
// * burst transport * //
when(!io.axi.r.bits.last) {
axi_cnt.inc()
data_wstrb(lru(vset))(0) := data_wstrb(lru(vset))(instperbank - 1)
(1 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := data_wstrb(lru(vset))(i - 1))
}.otherwise {
rready := false.B
data_wstrb(lru(vset)) := 0.U.asTypeOf(Vec(instperbank, UInt(4.W)))
tag_wstrb(lru(vset)) := false.B
}
}.elsewhen(!io.axi.r.ready) {
state := s_idle
}
}
is(s_save) {
when(!io.cpu.cpu_stall && !io.cpu.icache_stall) {
state := s_idle
(0 until instperbank).foreach(i => saved(i).valid := false.B)
}
}
}
// ===----------------------------------------------------------------===
// statistic
// ===----------------------------------------------------------------===
val req_cnt = RegInit(0.U(32.W))
when(io.cpu.req) {
req_cnt := req_cnt + 1.U
}
val hit_cnt = RegInit(0.U(32.W))
when(io.cpu.req && cache_hit) {
hit_cnt := hit_cnt + 1.U
}
if (!config.build) {
io.statistic.get.request := req_cnt
io.statistic.get.hit := hit_cnt
}
}

View File

@ -39,8 +39,6 @@ class Ctrl(implicit val config: CpuConfig) extends Module {
io.memoryUnit.do_flush := io.memoryUnit.flush_req
io.writeBackUnit.do_flush := false.B
io.instFifo.delay_slot_do_flush := io.memoryUnit.flush_req
io.executeUnit.fu.do_flush := io.memoryUnit.do_flush
io.executeUnit.fu.eret := io.memoryUnit.eret
io.executeUnit.fu.allow_to_go := io.memoryUnit.allow_to_go

View File

@ -5,23 +5,12 @@ import chisel3.util._
import cpu.defines.Const._
import cpu.CpuConfig
class TlbEntry extends Bundle {
val vpn2 = UInt(VPN2_WID.W)
val asid = UInt(ASID_WID.W)
val g = Bool()
val pfn = Vec(2, UInt(PFN_WID.W))
val c = Vec(2, Bool())
val d = Vec(2, Bool())
val v = Vec(2, Bool())
}
class ExceptionInfo extends Bundle {
val flush_req = Bool()
val tlb_refill = Bool()
val eret = Bool()
val badvaddr = UInt(PC_WID.W)
val bd = Bool()
val excode = UInt(EXCODE_WID.W)
val flush_req = Bool()
val eret = Bool()
val badvaddr = UInt(PC_WID.W)
val bd = Bool()
val excode = UInt(EXCODE_WID.W)
}
class SrcInfo extends Bundle {
@ -54,7 +43,6 @@ class InstInfo extends Bundle {
val branch_link = Bool()
val ifence = Bool()
val dfence = Bool()
val tlbfence = Bool()
val mem_addr = UInt(DATA_ADDR_WID.W)
val mem_wreg = Bool()
val inst = UInt(INST_WID.W)
@ -81,8 +69,6 @@ class FetchUnitCtrl extends Bundle {
}
class InstFifoCtrl extends Bundle {
val delay_slot_do_flush = Input(Bool())
val has2insts = Output(Bool())
}
@ -127,65 +113,18 @@ class WriteBackCtrl extends Bundle {
val do_flush = Input(Bool())
}
class Tlb1InfoI extends Bundle {
val invalid = Bool()
val refill = Bool()
}
class Tlb1InfoD extends Tlb1InfoI {
val modify = Bool()
}
class Tlb2Info extends Bundle {
val vpn2 = Input(UInt(19.W))
val found = Output(Bool())
val entry = Output(new TlbEntry())
}
class Tlb_ICache extends Bundle {
val fill = Input(Bool())
val icache_is_save = Input(Bool())
val uncached = Output(Bool())
val translation_ok = Output(Bool())
val hit = Output(Bool())
val tag = Output(UInt(20.W))
val pa = Output(UInt(32.W))
}
class Tlb_DCache extends Bundle {
val fill = Input(Bool())
val dcache_is_idle = Input(Bool())
val dcache_is_save = Input(Bool())
val uncached = Output(Bool())
val tlb1_ok = Output(Bool())
val translation_ok = Output(Bool())
val hit = Output(Bool())
val tag = Output(UInt(20.W))
val pa = Output(UInt(32.W))
}
// cpu to icache
class Cache_ICache(implicit
val config: CpuConfig,
) extends Bundle {
class Cache_ICache(implicit val config: CpuConfig) extends Bundle {
// read inst request from cpu
val req = Output(Bool())
val addr = Output(Vec(config.instFetchNum, UInt(32.W))) // virtual address and next virtual address
val en = Output(Bool())
val ready = Output(Bool())
val addr = Output(UInt(INST_ADDR_WID.W)) // virtual address and next virtual address
val fence = Output(Bool())
// read inst result
val inst = Input(Vec(config.instFetchNum, UInt(32.W)))
val inst_valid = Input(Vec(config.instFetchNum, Bool()))
// control
val cpu_stall = Output(Bool())
val icache_stall = Input(Bool())
val tlb = new Tlb_ICache()
val fence = Output(Bool())
val fence_addr = Output(UInt(32.W))
val rdata = Input(UInt(INST_WID.W))
val valid = Input(Bool())
val acc_err = Input(Bool())
}
// cpu to dcache
@ -202,41 +141,72 @@ class Cache_DCache extends Bundle {
val wdata = Output(UInt(32.W))
val addr = Output(UInt(32.W))
val tlb = new Tlb_DCache()
val fence = Output(Bool())
val fence_addr = Output(UInt(32.W))
}
// axi
// master
// master -> slave
class AR extends Bundle {
val addr = UInt(32.W)
val len = UInt(8.W)
val size = UInt(3.W)
val id = Output(UInt(4.W))
val addr = Output(UInt(32.W))
val len = Output(UInt(8.W))
val size = Output(UInt(3.W))
val burst = Output(UInt(2.W))
val lock = Output(UInt(2.W))
val cache = Output(UInt(4.W))
val prot = Output(UInt(3.W))
val valid = Output(Bool())
val ready = Input(Bool())
}
class R extends Bundle {
val data = UInt(32.W)
val last = Bool()
val ready = Output(Bool())
val id = Input(UInt(4.W))
val data = Input(UInt(32.W))
val resp = Input(UInt(2.W))
val last = Input(Bool())
val valid = Input(Bool())
}
class AW extends Bundle {
val addr = UInt(32.W)
val len = UInt(8.W)
val size = UInt(3.W)
val id = Output(UInt(4.W))
val addr = Output(UInt(32.W))
val len = Output(UInt(8.W))
val size = Output(UInt(3.W))
val burst = Output(UInt(2.W))
val lock = Output(UInt(2.W))
val cache = Output(UInt(4.W))
val prot = Output(UInt(3.W))
val valid = Output(Bool())
val ready = Input(Bool())
}
class W extends Bundle {
val data = UInt(32.W)
val strb = UInt(4.W)
val last = Bool()
val id = Output(UInt(4.W))
val data = Output(UInt(32.W))
val strb = Output(UInt(4.W))
val last = Output(Bool())
val valid = Output(Bool())
val ready = Input(Bool())
}
class B extends Bundle {
val ready = Output(Bool())
val id = Input(UInt(4.W))
val resp = Input(UInt(2.W))
val valid = Input(Bool())
}
class ICache_AXIInterface extends Bundle {
val ar = Decoupled(new AR())
val r = Flipped(Decoupled(new R()))
val ar = new AR()
val r = new R()
}
class DCache_AXIInterface extends ICache_AXIInterface {
@ -255,42 +225,42 @@ class Cache_AXIInterface extends Bundle {
// AXI read address channel
class AXI_AR extends Bundle {
val id = UInt(4.W) // transaction ID
val id = UInt(4.W) // transaction ID
val addr = UInt(32.W) // address
val len = UInt(8.W) // burst length
val size = UInt(3.W) // transfer size
val burst = UInt(2.W) // burst type
val lock = UInt(2.W) // lock type
val cache = UInt(4.W) // cache type
val prot = UInt(3.W) // protection type
val len = UInt(8.W) // burst length
val size = UInt(3.W) // transfer size
val burst = UInt(2.W) // burst type
val lock = UInt(2.W) // lock type
val cache = UInt(4.W) // cache type
val prot = UInt(3.W) // protection type
}
// AXI read data channel
class AXI_R extends Bundle {
val id = UInt(4.W) // transaction ID
val id = UInt(4.W) // transaction ID
val data = UInt(32.W) // read data
val resp = UInt(2.W) // response type
val last = Bool() // last beat of burst
val resp = UInt(2.W) // response type
val last = Bool() // last beat of burst
}
// AXI write address channel
class AXI_AW extends Bundle {
val id = UInt(4.W) // transaction ID
val id = UInt(4.W) // transaction ID
val addr = UInt(32.W) // address
val len = UInt(8.W) // burst length
val size = UInt(3.W) // transfer size
val burst = UInt(2.W) // burst type
val lock = UInt(2.W) // lock type
val cache = UInt(4.W) // cache type
val prot = UInt(3.W) // protection type
val len = UInt(8.W) // burst length
val size = UInt(3.W) // transfer size
val burst = UInt(2.W) // burst type
val lock = UInt(2.W) // lock type
val cache = UInt(4.W) // cache type
val prot = UInt(3.W) // protection type
}
// AXI write data channel
class AXI_W extends Bundle {
val id = UInt(4.W) // transaction ID
val id = UInt(4.W) // transaction ID
val data = UInt(32.W) // write data
val strb = UInt(4.W) // byte enable
val last = Bool() // last beat of burst
val strb = UInt(4.W) // byte enable
val last = Bool() // last beat of burst
}
// AXI write response channel
@ -301,10 +271,10 @@ class AXI_B extends Bundle {
// AXI interface
class AXI extends Bundle {
val ar = Decoupled(new AXI_AR()) // read address channel
val ar = Decoupled(new AXI_AR()) // read address channel
val r = Flipped(Decoupled(new AXI_R())) // read data channel
val aw = Decoupled(new AXI_AW()) // write address channel
val w = Decoupled(new AXI_W()) // write data channel
val aw = Decoupled(new AXI_AW()) // write address channel
val w = Decoupled(new AXI_W()) // write data channel
val b = Flipped(Decoupled(new AXI_B())) // write response channel
}

View File

@ -8,8 +8,8 @@ import cpu.CpuConfig
trait Constants {
val config = new CpuConfig
// 全局
val PC_WID = 32
val PC_INIT = "hbfc00000".U(PC_WID.W)
val PC_WID = 64
val PC_INIT = "h60000000".U(PC_WID.W)
val EXT_INT_WID = 6
@ -142,7 +142,8 @@ trait Constants {
val DIV_STOP = false.B
// inst rom
val INST_WID = 32
val INST_WID = 32
val INST_ADDR_WID = PC_WID
// data ram
val DATA_ADDR_WID = 32
@ -197,16 +198,16 @@ trait Constants {
// 例外类型
val EXCODE_WID = 5
val EX_NO = 0.U(EXCODE_WID.W) // 无异常
val EX_INT = 1.U(EXCODE_WID.W) // 中断异常
val EX_MOD = 2.U(EXCODE_WID.W) // TLB 条目修改异常
val EX_TLBL = 3.U(EXCODE_WID.W) // TLB 非法取指令或访问异常
val EX_TLBS = 4.U(EXCODE_WID.W) // TLB 非法存储访问异常
val EX_ADEL = 5.U(EXCODE_WID.W) // 地址未对齐异常取指令或访问异常
val EX_ADES = 6.U(EXCODE_WID.W) // 地址未对齐异常存储访问异常
val EX_SYS = 7.U(EXCODE_WID.W) // 系统调用异常
val EX_BP = 8.U(EXCODE_WID.W) // 断点异常
val EX_RI = 9.U(EXCODE_WID.W) // 保留指令异常
val EX_NO = 0.U(EXCODE_WID.W) // 无异常
val EX_INT = 1.U(EXCODE_WID.W) // 中断异常
val EX_MOD = 2.U(EXCODE_WID.W) // TLB 条目修改异常
val EX_TLBL = 3.U(EXCODE_WID.W) // TLB 非法取指令或访问异常
val EX_TLBS = 4.U(EXCODE_WID.W) // TLB 非法存储访问异常
val EX_ADEL = 5.U(EXCODE_WID.W) // 地址未对齐异常取指令或访问异常
val EX_ADES = 6.U(EXCODE_WID.W) // 地址未对齐异常存储访问异常
val EX_SYS = 7.U(EXCODE_WID.W) // 系统调用异常
val EX_BP = 8.U(EXCODE_WID.W) // 断点异常
val EX_RI = 9.U(EXCODE_WID.W) // 保留指令异常
val EX_CPU = 10.U(EXCODE_WID.W) // 协处理器不可用异常
val EX_OV = 11.U(EXCODE_WID.W) // 算术溢出异常
@ -232,6 +233,17 @@ trait Constants {
val C_WID = 3
val ASID_WID = 8
val VPN2_WID = 19
// AXI
val BURST_FIXED = 0
val BURST_INCR = 1
val BURST_WRAP = 2
val BURST_RESERVED = 3
val RESP_OKEY = 0
val RESP_EXOKEY = 1
val RESP_SLVERR = 2
val RESP_DECERR = 3
}
trait OptionConst {

View File

@ -1,70 +0,0 @@
package cpu.mmu
import chisel3._
import chisel3.util._
import cpu.defines._
class DTLB extends ITLB {
val dirty = Bool()
}
class TlbL1D extends Module {
val io = IO(new Bundle {
val cache = new Tlb_DCache()
val fence = Input(Bool())
val cpu_stall = Input(Bool())
val dcache_stall = Input(Bool())
val addr = Input(UInt(32.W))
val mem_en = Input(Bool())
val mem_write = Input(Bool())
val tlb1 = Output(new Tlb1InfoD())
val tlb2 = Flipped(new Tlb2Info())
})
val dtlb = RegInit(0.U.asTypeOf(new DTLB()))
val vpn = io.addr(31, 12)
val direct_mapped = io.addr(31, 30) === 2.U(2.W)
io.cache.uncached := Mux(direct_mapped, io.addr(29), dtlb.uncached)
io.cache.translation_ok := direct_mapped || (dtlb.vpn === vpn && dtlb.valid && (!io.mem_write || dtlb.dirty))
io.cache.tag := Mux(direct_mapped, Cat(0.U(3.W), io.addr(28, 12)), dtlb.ppn)
io.cache.pa := Cat(io.cache.tag, io.addr(11, 0))
io.cache.tlb1_ok := dtlb.vpn === vpn && dtlb.valid
io.cache.hit := io.cache.fill && io.tlb2.found && io.tlb2.entry.v(vpn(0))
when(io.fence) { dtlb.valid := false.B }
val tlb1 = RegInit(0.U.asTypeOf(new Tlb1InfoD()))
io.tlb1 <> tlb1
val tlb2 = RegInit(0.U.asTypeOf(new Bundle { val vpn2 = UInt(19.W) }))
io.tlb2.vpn2 <> tlb2.vpn2
when(io.cache.dcache_is_idle && !io.cache.fill && io.mem_en && !io.cache.translation_ok) {
when(io.cache.tlb1_ok) {
tlb1.modify := true.B
}.otherwise {
tlb2.vpn2 := vpn(19, 1)
}
}.elsewhen(io.cache.fill) {
when(io.tlb2.found) {
when(io.tlb2.entry.v(vpn(0))) {
dtlb.vpn := vpn
dtlb.ppn := io.tlb2.entry.pfn(vpn(0))
dtlb.uncached := !io.tlb2.entry.c(vpn(0))
dtlb.dirty := io.tlb2.entry.d(vpn(0))
dtlb.valid := true.B
}.otherwise {
tlb1.invalid := true.B
}
}.otherwise {
tlb1.refill := true.B
}
}.elsewhen(io.cache.dcache_is_save && !io.cpu_stall && !io.dcache_stall) {
tlb1.invalid := false.B
tlb1.refill := false.B
tlb1.modify := false.B
}
}

View File

@ -1,59 +0,0 @@
package cpu.mmu
import chisel3._
import chisel3.util._
import cpu.defines._
class ITLB extends Bundle {
val vpn = UInt(20.W)
val ppn = UInt(20.W)
val uncached = Bool()
val valid = Bool()
}
class TlbL1I extends Module {
val io = IO(new Bundle {
val addr = Input(UInt(32.W))
val fence = Input(Bool())
val cpu_stall = Input(Bool())
val icache_stall = Input(Bool())
val cache = new Tlb_ICache()
val tlb1 = Output(new Tlb1InfoI())
val tlb2 = Flipped(new Tlb2Info())
})
val itlb = RegInit(0.U.asTypeOf(new ITLB()))
val vpn = io.addr(31, 12)
val direct_mapped = io.addr(31, 30) === 2.U(2.W)
io.cache.uncached := Mux(direct_mapped, io.addr(29), itlb.uncached)
io.cache.translation_ok := direct_mapped || (itlb.vpn === vpn && itlb.valid)
io.cache.hit := io.tlb2.found && io.tlb2.entry.v(vpn(0))
io.cache.tag := Mux(direct_mapped, Cat(0.U(3.W), io.addr(28, 12)), itlb.ppn)
io.cache.pa := Cat(io.cache.tag, io.addr(11, 0))
when(io.fence && !io.icache_stall && !io.cpu_stall) { itlb.valid := false.B }
// * tlb1 * //
val tlb1 = RegInit(0.U.asTypeOf(new Tlb1InfoI()))
tlb1 <> io.tlb1
io.tlb2.vpn2 := vpn(19, 1)
when(io.cache.fill) {
when(io.tlb2.found) {
when(io.tlb2.entry.v(vpn(0))) {
itlb.vpn := vpn
itlb.ppn := io.tlb2.entry.pfn(vpn(0))
itlb.uncached := !io.tlb2.entry.c(vpn(0))
itlb.valid := true.B
}.otherwise {
tlb1.invalid := true.B
}
}.otherwise {
tlb1.refill := true.B
}
}.elsewhen(io.cache.icache_is_save && !io.cpu_stall && !io.icache_stall) {
tlb1.invalid := false.B
tlb1.refill := false.B
}
}

View File

@ -1,69 +0,0 @@
package cpu.pipeline.execute
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.TlbEntry
import cpu.defines.Const._
class TlbL2 extends Module {
val io = IO(new Bundle {
val in = Input(new Bundle {
val write = new Bundle {
val en = Bool()
val index = UInt(log2Ceil(TLB_NUM).W)
val entry = new TlbEntry()
}
val read = new Bundle {
val index = UInt(log2Ceil(TLB_NUM).W)
}
val entry_hi = new Bundle {
val vpn2 = UInt(VPN2_WID.W)
val asid = UInt(ASID_WID.W)
}
val tlb1_vpn2 = UInt(VPN2_WID.W)
val tlb2_vpn2 = UInt(VPN2_WID.W)
})
val out = Output(new Bundle {
val read = new Bundle {
val entry = new TlbEntry()
}
val tlb1_found = Bool()
val tlb2_found = Bool()
val tlb1_entry = new TlbEntry()
val tlb2_entry = new TlbEntry()
val tlb_found = Bool()
val tlb_match_index = UInt(log2Ceil(TLB_NUM).W)
})
})
// tlb l2
val tlb_l2 = RegInit(VecInit(Seq.fill(TLB_NUM)(0.U.asTypeOf(new TlbEntry()))))
val tlb_match = Seq.fill(3)(Wire(Vec(TLB_NUM, Bool())))
val tlb_find_vpn2 = Wire(Vec(3, UInt(VPN2_WID.W)))
val tlb_match_index = Wire(Vec(3, UInt(log2Ceil(TLB_NUM).W)))
tlb_find_vpn2(0) := io.in.entry_hi.vpn2
tlb_find_vpn2(1) := io.in.tlb1_vpn2
tlb_find_vpn2(2) := io.in.tlb2_vpn2
io.out.tlb1_found := tlb_match(1).asUInt.orR
io.out.tlb2_found := tlb_match(2).asUInt.orR
io.out.tlb1_entry := tlb_l2(tlb_match_index(1))
io.out.tlb2_entry := tlb_l2(tlb_match_index(2))
io.out.tlb_found := tlb_match(0).asUInt.orR
io.out.tlb_match_index := tlb_match_index(0)
io.out.read.entry := tlb_l2(io.in.read.index)
for (i <- 0 until (3)) {
for (j <- 0 until (TLB_NUM)) {
tlb_match(i)(j) := (tlb_l2(j).g || tlb_l2(j).asid === io.in.entry_hi.asid) &&
(tlb_l2(j).vpn2 === tlb_find_vpn2(i))
}
tlb_match_index(i) := PriorityEncoder(tlb_match(i))
}
when(io.in.write.en) {
tlb_l2(io.in.write.index) := io.in.write.entry
}
}

View File

@ -12,7 +12,6 @@ class InstFifoDecoderUnit(implicit val config: CpuConfig) extends Bundle {
val allow_to_go = Output(Vec(config.decoderNum, Bool()))
val inst = Input(Vec(config.decoderNum, new BufferUnit()))
val info = Input(new Bundle {
val inst0_is_in_delayslot = Bool()
val empty = Bool()
val almost_empty = Bool()
})
@ -108,8 +107,6 @@ class DecoderUnit(implicit val config: CpuConfig) extends Module {
val pc = io.instFifo.inst.map(_.pc)
val inst = io.instFifo.inst.map(_.inst)
val inst_info = decoder.map(_.io.out)
val tlb_refill = io.instFifo.inst.map(_.tlb.refill)
val tlb_invalid = io.instFifo.inst.map(_.tlb.invalid)
val interrupt = io.cp0.intterupt_allowed && (io.cp0.cause_ip & io.cp0.status_im).orR && !io.instFifo.info.empty
for (i <- 0 until (config.decoderNum)) {
@ -141,12 +138,9 @@ class DecoderUnit(implicit val config: CpuConfig) extends Module {
)
io.executeStage.inst0.ex.flush_req :=
io.executeStage.inst0.ex.excode =/= EX_NO ||
io.executeStage.inst0.ex.tlb_refill ||
io.executeStage.inst0.ex.eret
io.executeStage.inst0.ex.tlb_refill := tlb_refill(0)
io.executeStage.inst0.ex.eret := inst_info(0).op === EXE_ERET
io.executeStage.inst0.ex.badvaddr := pc(0)
io.executeStage.inst0.ex.bd := io.instFifo.info.inst0_is_in_delayslot
val inst0_ex_cpu =
!io.cp0.access_allowed && VecInit(EXE_MFC0, EXE_MTC0, EXE_TLBR, EXE_TLBWI, EXE_TLBWR, EXE_TLBP, EXE_ERET, EXE_WAIT)
.contains(inst_info(0).op)
@ -154,7 +148,6 @@ class DecoderUnit(implicit val config: CpuConfig) extends Module {
EX_NO,
Seq(
interrupt -> EX_INT,
(tlb_refill(0) || tlb_invalid(0)) -> EX_TLBL,
(pc(0)(1, 0).orR || (pc(0)(31) && !io.cp0.kernel_mode)) -> EX_ADEL,
(inst_info(0).inst_valid === INST_INVALID) -> EX_RI,
(inst_info(0).op === EXE_SYSCALL) -> EX_SYS,
@ -189,18 +182,15 @@ class DecoderUnit(implicit val config: CpuConfig) extends Module {
forwardCtrl.out.inst(1).src2.rdata,
decoder(1).io.out.imm32
)
io.executeStage.inst1.ex.flush_req := io.executeStage.inst1.ex.excode =/= EX_NO || io.executeStage.inst1.ex.tlb_refill
io.executeStage.inst1.ex.tlb_refill := tlb_refill(1)
io.executeStage.inst1.ex.flush_req := io.executeStage.inst1.ex.excode =/= EX_NO
io.executeStage.inst1.ex.eret := inst_info(1).op === EXE_ERET
io.executeStage.inst1.ex.badvaddr := pc(1)
io.executeStage.inst1.ex.bd := issue.inst1.is_in_delayslot
val inst1_ex_cpu =
!io.cp0.access_allowed && VecInit(EXE_MFC0, EXE_MTC0, EXE_TLBR, EXE_TLBWI, EXE_TLBWR, EXE_TLBP, EXE_ERET, EXE_WAIT)
.contains(inst_info(1).op)
io.executeStage.inst1.ex.excode := MuxCase(
EX_NO,
Seq(
(tlb_refill(1) || tlb_invalid(1)) -> EX_TLBL,
(pc(1)(1, 0).orR || (pc(1)(31) && !io.cp0.kernel_mode)) -> EX_ADEL,
(inst_info(1).inst_valid === INST_INVALID) -> EX_RI,
(inst_info(1).op === EXE_SYSCALL) -> EX_SYS,

View File

@ -18,7 +18,6 @@ class Issue(implicit val config: CpuConfig) extends Module {
val execute = Input(Vec(config.fuNum, new MemRead()))
// 输出
val inst1 = Output(new Bundle {
val is_in_delayslot = Bool()
val allow_to_go = Bool()
})
})
@ -49,8 +48,6 @@ class Issue(implicit val config: CpuConfig) extends Module {
inst0.op === EXE_MTC0 && inst1.op === EXE_MFC0 && inst0.cp0_addr === inst1.cp0_addr
val data_conflict = raw_reg || raw_hilo || raw_cp0 || load_stall
// 指令1是否在延迟槽中
io.inst1.is_in_delayslot := inst0.fusel === FU_BR && io.inst1.allow_to_go
// 指令1是否允许执行
io.inst1.allow_to_go := io.allow_to_go &&
!instFifo_invalid &&

View File

@ -8,7 +8,7 @@ import cpu.pipeline.decoder.Src12Read
class ExecuteUnitBranchPredictor extends Bundle {
val bpuConfig = new BranchPredictorConfig()
val pc = Output(UInt(DATA_ADDR_WID.W))
val pc = Output(UInt(PC_WID.W))
val update_pht_index = Output(UInt(bpuConfig.phtDepth.W))
val branch_inst = Output(Bool())
val branch = Output(Bool())
@ -20,8 +20,8 @@ class BranchPredictorIO(implicit config: CpuConfig) extends Bundle {
val inst = Input(UInt(INST_WID.W))
val op = Input(UInt(OP_WID.W))
val ena = Input(Bool())
val pc = Input(UInt(DATA_ADDR_WID.W))
val pc_plus4 = Input(UInt(DATA_ADDR_WID.W))
val pc = Input(UInt(PC_WID.W))
val pc_plus4 = Input(UInt(PC_WID.W))
val pht_index = Input(UInt(bpuConfig.phtDepth.W))
val rs1 = Input(UInt(REG_ADDR_WID.W))
@ -29,7 +29,7 @@ class BranchPredictorIO(implicit config: CpuConfig) extends Bundle {
val branch_inst = Output(Bool())
val pred_branch = Output(Bool())
val branch_target = Output(UInt(DATA_ADDR_WID.W))
val branch_target = Output(UInt(PC_WID.W))
val update_pht_index = Output(UInt(bpuConfig.phtDepth.W))
}

View File

@ -31,7 +31,7 @@ class FetchUnit(implicit
}
})
val pc = RegNext(io.iCache.pc_next, "h_bfc00000".U(32.W))
val pc = RegNext(io.iCache.pc_next, PC_INIT)
io.iCache.pc := pc
// when inst_valid(1) is true, inst_valid(0) must be true

View File

@ -2,29 +2,21 @@ package cpu.pipeline.fetch
import chisel3._
import chisel3.util._
import cpu.{CpuConfig, BranchPredictorConfig}
import cpu.defines.Const._
import cpu.{BranchPredictorConfig, CpuConfig}
class BufferUnit extends Bundle {
val bpuConfig = new BranchPredictorConfig()
val tlb = new Bundle {
val refill = Bool()
val invalid = Bool()
}
val inst = UInt(32.W)
val inst = UInt(INST_WID.W)
val pht_index = UInt(bpuConfig.phtDepth.W)
val pc = UInt(32.W)
val pc = UInt(PC_WID.W)
}
class InstFifo(implicit val config: CpuConfig) extends Module {
val io = IO(new Bundle {
val do_flush = Input(Bool())
val flush_delay_slot = Input(Bool())
val delay_sel_flush = Input(Bool())
val decoder_delay_flush = Input(Bool())
val execute_delay_flush = Input(Bool())
val icache_stall = Input(Bool())
val jump_branch_inst = Input(Bool()) // 译码阶段的inst0是否为跳转指令
val inst0_is_in_delayslot = Output(Bool())
val ren = Input(Vec(config.decoderNum, Bool()))
val read = Output(Vec(config.decoderNum, new BufferUnit()))
@ -50,63 +42,28 @@ class InstFifo(implicit val config: CpuConfig) extends Module {
io.empty := count === 0.U
io.almost_empty := count === 1.U
val inst0_is_in_delayslot = RegInit(false.B)
io.inst0_is_in_delayslot := inst0_is_in_delayslot
inst0_is_in_delayslot := MuxCase(
false.B,
Seq(
io.flush_delay_slot -> false.B,
!io.ren(0) -> inst0_is_in_delayslot,
(io.jump_branch_inst && !io.ren(1)) -> true.B,
),
)
val delayslot_stall = RegInit(false.B)
val delayslot_enable = RegInit(false.B)
val delayslot_line = RegInit(0.U.asTypeOf(new BufferUnit()))
when(io.do_flush && io.delay_sel_flush && !io.flush_delay_slot && io.icache_stall && (io.empty || io.almost_empty)) {
delayslot_stall := true.B
}.elsewhen(delayslot_stall && io.wen(0)) {
delayslot_stall := false.B
}
when(io.do_flush && !io.flush_delay_slot && io.delay_sel_flush) {
when(io.execute_delay_flush) {
delayslot_enable := true.B
delayslot_line := Mux(io.empty, io.write(0), buffer(deq_ptr))
}.elsewhen(io.decoder_delay_flush) {
delayslot_enable := true.B
delayslot_line := Mux(io.almost_empty, io.write(0), buffer(deq_ptr + 1.U))
}.otherwise {
delayslot_enable := false.B
}
}.elsewhen(!delayslot_stall && io.ren(0)) {
delayslot_enable := false.B
}
// * deq * //
io.read(0) := MuxCase(
buffer(deq_ptr),
Seq(
delayslot_enable -> delayslot_line,
io.empty -> 0.U.asTypeOf(new BufferUnit()),
io.almost_empty -> buffer(deq_ptr),
),
io.empty -> 0.U.asTypeOf(new BufferUnit()),
io.almost_empty -> buffer(deq_ptr)
)
)
io.read(1) := MuxCase(
buffer(deq_ptr + 1.U),
Seq(
(delayslot_enable || io.empty || io.almost_empty) -> 0.U.asTypeOf(new BufferUnit()),
),
(io.empty || io.almost_empty) -> 0.U.asTypeOf(new BufferUnit())
)
)
val deq_num = MuxCase(
0.U,
Seq(
(io.empty || delayslot_enable) -> 0.U,
io.ren(1) -> 2.U,
io.ren(0) -> 1.U,
),
(io.empty) -> 0.U,
io.ren(1) -> 2.U,
io.ren(0) -> 1.U
)
)
when(io.do_flush) {

View File

@ -1,94 +0,0 @@
package cpu.pipeline.fetch
import chisel3._
import chisel3.util._
import cpu.defines.Const._
import cpu.CpuConfig
import cpu.pipeline.fetch.BufferUnit
class BufferEnq extends Bundle {
val valid = Bool()
val jump_branch_inst = Bool()
val op = UInt(OP_WID.W)
val is_in_delayslot = Bool()
val tlb = new Bundle {
val refill = Bool()
val invalid = Bool()
}
val inst = UInt(32.W)
val pc = UInt(32.W)
}
class PreDecoder(implicit val config: CpuConfig) extends Module {
val io = IO(new Bundle {
val flush = Input(Bool())
val full = new Bundle {
val fromInstFifo = Input(Bool())
val toIcache = Output(Bool())
}
val read = Output(Vec(config.instFetchNum, new BufferEnq()))
val wen = Input(Vec(config.instFetchNum, Bool()))
val write = Input(Vec(config.instFetchNum, new BufferUnit()))
})
val buffer = RegInit(VecInit(Seq.fill(config.instFetchNum)(0.U.asTypeOf(new BufferEnq()))))
for (i <- 0 until config.instFetchNum) {
when(io.wen(i) && !io.full.fromInstFifo) {
buffer(i).tlb.refill := io.write(i).tlb.refill
buffer(i).tlb.invalid := io.write(i).tlb.invalid
buffer(i).inst := io.write(i).inst
buffer(i).pc := io.write(i).pc
}
when(!io.full.fromInstFifo) {
buffer(i).valid := io.wen(i)
}
}
io.full.toIcache := io.full.fromInstFifo
for (i <- 0 until config.instFetchNum) {
val signals: List[UInt] = ListLookup(
buffer(i).inst,
List(EXE_NOP, false.B),
Array( // 跳转指令
J -> List(EXE_J, true.B),
JAL -> List(EXE_JAL, true.B),
JR -> List(EXE_JR, true.B),
JALR -> List(EXE_JALR, true.B),
BEQ -> List(EXE_BEQ, true.B),
BNE -> List(EXE_BNE, true.B),
BGTZ -> List(EXE_BGTZ, true.B),
BLEZ -> List(EXE_BLEZ, true.B),
BGEZ -> List(EXE_BGEZ, true.B),
BGEZAL -> List(EXE_BGEZAL, true.B),
BLTZ -> List(EXE_BLTZ, true.B),
BLTZAL -> List(EXE_BLTZAL, true.B),
),
)
val op :: jump_branch_inst :: Nil = signals
io.read(i).tlb.refill := buffer(i).tlb.refill
io.read(i).tlb.invalid := buffer(i).tlb.invalid
io.read(i).inst := buffer(i).inst
io.read(i).pc := buffer(i).pc
io.read(i).valid := buffer(i).valid
io.read(i).jump_branch_inst := jump_branch_inst
io.read(i).op := op
}
val inst0_is_in_delayslot = RegNext(buffer(config.instFetchNum - 1).jump_branch_inst)
for (i <- 1 until config.instFetchNum) {
io.read(i).is_in_delayslot := buffer(i - 1).jump_branch_inst
}
io.read(0).is_in_delayslot := inst0_is_in_delayslot
when(io.flush) {
for (i <- 0 until config.instFetchNum) {
buffer(i).valid := false.B
}
}
}

View File

@ -22,11 +22,6 @@ class MemoryUnit(implicit val config: CpuConfig) extends Module {
val writeBackStage = Output(new MemoryUnitWriteBackUnit())
val dataMemory = new Bundle {
val in = Input(new Bundle {
val tlb = new Bundle {
val invalid = Bool()
val refill = Bool()
val modify = Bool()
}
val rdata = UInt(DATA_WID.W)
})
val out = Output(new Bundle {
@ -66,20 +61,13 @@ class MemoryUnit(implicit val config: CpuConfig) extends Module {
io.memoryStage.inst0.rd_info.wdata,
)
io.writeBackStage.inst0.ex := io.memoryStage.inst0.ex
val inst0_access_mem =
(io.dataMemory.out.en && (io.dataMemory.in.tlb.invalid || io.dataMemory.in.tlb.refill) && io.memoryStage.inst0.inst_info.fusel === FU_MEM)
val inst0_tlbmod =
(io.dataMemory.in.tlb.modify && io.dataMemory.out.wen.orR && io.memoryStage.inst0.inst_info.fusel === FU_MEM)
io.writeBackStage.inst0.ex.excode := MuxCase(
io.memoryStage.inst0.ex.excode,
Seq(
(io.memoryStage.inst0.ex.excode =/= EX_NO) -> io.memoryStage.inst0.ex.excode,
inst0_access_mem -> Mux(io.dataMemory.out.wen.orR, EX_TLBS, EX_TLBL),
inst0_tlbmod -> EX_MOD,
),
)
io.writeBackStage.inst0.ex.tlb_refill := io.memoryStage.inst0.ex.tlb_refill && io.memoryStage.inst0.ex.excode === EX_TLBL || io.dataMemory.in.tlb.refill && io.memoryStage.inst0.inst_info.fusel === FU_MEM
io.writeBackStage.inst0.ex.flush_req := io.memoryStage.inst0.ex.flush_req || io.writeBackStage.inst0.ex.excode =/= EX_NO || io.writeBackStage.inst0.ex.tlb_refill
io.writeBackStage.inst0.ex.flush_req := io.memoryStage.inst0.ex.flush_req || io.writeBackStage.inst0.ex.excode =/= EX_NO
io.writeBackStage.inst0.cp0 := io.memoryStage.inst0.cp0
io.writeBackStage.inst1.pc := io.memoryStage.inst1.pc
@ -90,20 +78,13 @@ class MemoryUnit(implicit val config: CpuConfig) extends Module {
io.memoryStage.inst1.rd_info.wdata,
)
io.writeBackStage.inst1.ex := io.memoryStage.inst1.ex
val inst1_access_mem =
(io.dataMemory.out.en && (io.dataMemory.in.tlb.invalid || io.dataMemory.in.tlb.refill) && io.memoryStage.inst1.inst_info.fusel === FU_MEM)
val inst1_tlbmod =
(io.dataMemory.in.tlb.modify && io.dataMemory.out.wen.orR && io.memoryStage.inst1.inst_info.fusel === FU_MEM)
io.writeBackStage.inst1.ex.excode := MuxCase(
io.memoryStage.inst1.ex.excode,
Seq(
(io.memoryStage.inst1.ex.excode =/= EX_NO) -> io.memoryStage.inst1.ex.excode,
inst1_access_mem -> Mux(io.dataMemory.out.wen.orR, EX_TLBS, EX_TLBL),
inst1_tlbmod -> EX_MOD,
),
)
io.writeBackStage.inst1.ex.tlb_refill := io.memoryStage.inst1.ex.tlb_refill && io.memoryStage.inst1.ex.excode === EX_TLBL || io.dataMemory.in.tlb.refill && io.memoryStage.inst1.inst_info.fusel === FU_MEM
io.writeBackStage.inst1.ex.flush_req := io.memoryStage.inst1.ex.flush_req || io.writeBackStage.inst1.ex.excode =/= EX_NO || io.writeBackStage.inst1.ex.tlb_refill
io.writeBackStage.inst1.ex.flush_req := io.memoryStage.inst1.ex.flush_req || io.writeBackStage.inst1.ex.excode =/= EX_NO
io.cp0.in.inst(0).pc := io.writeBackStage.inst0.pc
io.cp0.in.inst(0).ex := io.writeBackStage.inst0.ex