删除不必要的文件

This commit is contained in:
Liphen 2024-03-22 14:23:12 +08:00
parent 7e13a02cb4
commit a69e4e907d
23 changed files with 0 additions and 3233 deletions

View File

@ -1,119 +0,0 @@
package cpu.axi
import chisel3._
import chisel3.util._
/** A simple FIFO buffer implemented using Chisel's built-in Queue module.
*
* @param dataWidth
* The width of the data to be stored in the buffer.
* @param buffDepth
* The depth of the buffer (i.e. the number of elements it can hold).
* @param addrWidth
* The width of the address used to access the buffer.
*/
class FifoBuffer(
val dataWidth: Int = 32,
val buffDepth: Int = 4,
val addrWidth: Int = 2,
) extends Module {
val io = IO(new Bundle {
val wen = Input(Bool()) // Write enable signal.
val ren = Input(Bool()) // Read enable signal.
val input = Input(UInt(dataWidth.W)) // Data to be written to the buffer.
val output = Output(UInt(dataWidth.W)) // Data read from the buffer.
val empty = Output(Bool()) // Output signal indicating whether the buffer is empty.
val full = Output(Bool()) // Output signal indicating whether the buffer is full.
})
// Instantiate a Queue module with the given data width and buffer depth.
val queue = Module(new Queue(UInt(dataWidth.W), buffDepth))
// Connect the input and output signals to the Queue module.
queue.io.enq.valid := io.wen
queue.io.enq.bits := io.input
io.full := queue.io.enq.ready === false.B
queue.io.deq.ready := io.ren
io.output := queue.io.deq.bits
io.empty := queue.io.count === 0.U
}
/** A simple counter that keeps track of the number of elements in a FIFO buffer.
*
* @param buffDepth
* The depth of the buffer (i.e. the number of elements it can hold).
* @param addrWidth
* The width of the address used to access the buffer.
*/
class FifoCount(
val buffDepth: Int = 4,
val addrWidth: Int = 2,
) extends Module {
val io = IO(new Bundle {
val wen = Input(Bool())
val ren = Input(Bool())
val empty = Output(Bool())
val full = Output(Bool())
})
val count = RegInit(0.U(addrWidth.W))
io.empty := count === 0.U
io.full := count === buffDepth.U
when(io.ren && !io.empty) {
count := count - 1.U
}.elsewhen(io.wen && !io.full) {
count := count + 1.U
}
}
/** A FIFO buffer with a valid signal that checks if the output data is related to a specific value.
*
* @param dataWidth
* The width of the data to be stored in the buffer.
* @param buffDepth
* The depth of the buffer (i.e. the number of elements it can hold).
* @param addrWidth
* The width of the address used to access the buffer.
* @param relatedDataWidth
* The width of the related data used to check if the output data is related to a specific value.
*/
class FifoBufferValid(
val dataWidth: Int = 33,
val buffDepth: Int = 6,
val addrWidth: Int = 3,
val relatedDataWidth: Int = 32,
) extends Module {
val io = IO(new Bundle {
val wen = Input(Bool()) // Write enable signal.
val ren = Input(Bool()) // Read enable signal.
val empty = Output(Bool()) // Output signal indicating whether the buffer is empty.
val full = Output(Bool()) // Output signal indicating whether the buffer is full.
val related_1 = Output(
Bool(),
) // Output signal indicating whether the output data is related to a specific value.
val input = Input(UInt(dataWidth.W)) // Data to be written to the buffer.
val output = Output(UInt(dataWidth.W)) // Data read from the buffer.
val related_data_1 = Input(
UInt(relatedDataWidth.W),
) // Related data used to check if the output data is related to a specific value.
})
// Instantiate a Queue module with the given data width and buffer depth.
val queue = Module(new Queue(UInt(dataWidth.W), buffDepth))
// Connect the input and output signals to the Queue module.
queue.io.enq.valid := io.wen
queue.io.enq.bits := io.input
io.full := queue.io.count === buffDepth.U
io.empty := queue.io.count === 0.U
io.output := queue.io.deq.bits
// Connect the ready signal to the read enable input.
queue.io.deq.ready := io.ren
// Check if the output data is related to a specific value.
io.related_1 := queue.io.deq.valid && io.related_data_1 === queue.io.deq
.bits(relatedDataWidth - 1, 0)
}

View File

@ -1,30 +0,0 @@
package cache
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
import cpu.CpuConfig
import cpu.CacheConfig
class Cache(implicit cpuConfig: CpuConfig) extends Module {
val io = IO(new Bundle {
val inst = Flipped(new Cache_ICache())
val data = Flipped(new Cache_DCache())
val axi = new AXI()
})
implicit val iCacheConfig = CacheConfig(cacheType = "icache")
implicit val dCacheConfig = CacheConfig(cacheType = "dcache")
val icache = Module(new ICache(iCacheConfig))
val dcache = Module(new DCache(dCacheConfig))
val axi_interface = Module(new CacheAXIInterface())
icache.io.axi <> axi_interface.io.icache
dcache.io.axi <> axi_interface.io.dcache
io.inst <> icache.io.cpu
io.data <> dcache.io.cpu
io.axi <> axi_interface.io.axi
}

View File

@ -1,85 +0,0 @@
package cache
import chisel3._
import chisel3.util._
import cpu.defines._
class CacheAXIInterface extends Module {
val io = IO(new Bundle {
val icache = Flipped(new ICache_AXIInterface())
val dcache = Flipped(new DCache_AXIInterface())
val axi = new AXI()
})
// pass-through aw {
io.axi.aw.bits.id := 1.U
io.axi.aw.bits.addr := io.dcache.aw.bits.addr
io.axi.aw.bits.len := io.dcache.aw.bits.len
io.axi.aw.bits.size := io.dcache.aw.bits.size
io.axi.aw.valid := io.dcache.aw.valid
io.axi.aw.bits.burst := 1.U
io.axi.aw.bits.prot := 0.U
io.axi.aw.bits.cache := 0.U
io.axi.aw.bits.lock := 0.U
io.dcache.aw.ready := io.axi.aw.ready
// pass-through aw }
// pass-through w {
io.axi.w.bits.id := 1.U
io.axi.w.bits.data := io.dcache.w.bits.data
io.axi.w.bits.strb := io.dcache.w.bits.strb
io.axi.w.bits.last := io.dcache.w.bits.last
io.axi.w.valid := io.dcache.w.valid
io.dcache.w.ready := io.axi.w.ready
// pass-through aw }
// pass-through b {
io.dcache.b.bits.id := io.axi.b.bits.id
io.dcache.b.valid := io.axi.b.valid
io.dcache.b.bits.resp := io.axi.b.bits.resp
io.axi.b.ready := io.dcache.b.ready
// pass-through b }
// mux ar {
// we need to lock ar to avoid signals change during handshake
val ar_sel_lock = RegInit(false.B)
val ar_sel_val = RegInit(false.B)
val choose_dcache = Mux(ar_sel_lock, ar_sel_val, !io.icache.ar.valid && io.dcache.ar.valid)
when(io.axi.ar.valid) {
when(io.axi.ar.ready) {
ar_sel_lock := false.B
}.otherwise {
ar_sel_lock := true.B
ar_sel_val := choose_dcache
}
}
io.axi.ar.bits.id := Cat(0.U(3.W), choose_dcache)
io.axi.ar.bits.addr := Mux(choose_dcache, io.dcache.ar.bits.addr, io.icache.ar.bits.addr)
io.axi.ar.bits.len := Mux(choose_dcache, io.dcache.ar.bits.len, io.icache.ar.bits.len)
io.axi.ar.bits.size := Mux(choose_dcache, io.dcache.ar.bits.size, io.icache.ar.bits.size)
io.axi.ar.valid := Mux(choose_dcache, io.dcache.ar.valid, io.icache.ar.valid)
io.axi.ar.bits.burst := 1.U
io.axi.ar.bits.prot := 0.U
io.axi.ar.bits.cache := 0.U
io.axi.ar.bits.lock := 0.U
io.icache.ar.ready := !choose_dcache && io.axi.ar.ready
io.dcache.ar.ready := choose_dcache && io.axi.ar.ready
// mux ar }
// mux r based on rid {
val r_sel = io.axi.r.bits.id(0)
io.icache.r.bits.id := io.axi.r.bits.id
io.icache.r.bits.data := io.axi.r.bits.data
io.icache.r.bits.resp := io.axi.r.bits.resp
io.icache.r.bits.last := io.axi.r.bits.last
io.icache.r.valid := !r_sel && io.axi.r.valid
io.dcache.r.bits.id := io.axi.r.bits.id
io.dcache.r.bits.data := io.axi.r.bits.data
io.dcache.r.bits.resp := io.axi.r.bits.resp
io.dcache.r.bits.last := io.axi.r.bits.last
io.dcache.r.valid := r_sel && io.axi.r.valid
io.axi.r.ready := Mux(r_sel, io.dcache.r.ready, io.icache.r.ready)
// mux r based on rid }
}

View File

@ -1,783 +0,0 @@
package cache
import chisel3._
import chisel3.util._
import memory._
import cpu.CacheConfig
import cpu.defines._
import cpu.CpuConfig
import cpu.defines.Const._
import icache.mmu.AccessType
/*
整个宽度为PADDR_WID的地址
==========================================================
| tag | index | offset |
| | | bank index | bank offset |
==========================================================
nway nindex
==============================================================
| valid | dirty | tag | bank 0 | bank 1 | ... | bank n |
| 1 | 1 | | | | | |
==============================================================
| bank |
| data 0 | data 1 | ... | data n |
| XLEN | XLEN | ... | XLEN |
=====================================
CPU 的实现如下
每个bank分为多个dataBlocks每个dataBlocks的宽度为AXI_DATA_WID这样能方便的和AXI总线进行交互
RV64实现中AXI_DATA_WID为64所以每个dataBlocks可以存储1个数据
为了简化设计目前*一个bank中只有一个dataBlocks*即每个bank中只能存储一个数据
这样的话dataBlocks可以被简化掉直接用bank代替
//TODO解决AXI_DATA_WID小于XLEN的情况
==============================================================
| valid | dirty | tag | bank 0 | bank 1 | ... | bank n |
| 1 | 1 | | | | | |
==============================================================
| bank |
| dataBlocks |
| data 0 |
| 64 |
===================
*/
class WriteBufferUnit extends Bundle {
val data = UInt(XLEN.W)
val addr = UInt(XLEN.W)
val strb = UInt(AXI_STRB_WID.W)
val size = UInt(AXI_SIZE_WID.W)
}
class DCache(cacheConfig: CacheConfig)(implicit cpuConfig: CpuConfig) extends Module with HasTlbConst with HasCSRConst {
val nway = cacheConfig.nway
val nindex = cacheConfig.nindex
val nbank = cacheConfig.nbank
val instFetchNum = cpuConfig.instFetchNum
val bankOffsetWidth = cacheConfig.bankOffsetWidth
val bankIndexWidth = cacheConfig.offsetWidth - bankOffsetWidth
val bytesPerBank = cacheConfig.bytesPerBank
val tagWidth = cacheConfig.tagWidth
val indexWidth = cacheConfig.indexWidth
val offsetWidth = cacheConfig.offsetWidth
val bitsPerBank = cacheConfig.bitsPerBank
val writeFifoDepth = 4
// 每个bank中存AXI_DATA_WID位的数据
// TODO:目前的实现只保证了AXI_DATA_WID为XLEN的情况下的正确性
require(AXI_DATA_WID == XLEN, "AXI_DATA_WID should be greater than XLEN")
def pAddr = new Bundle {
val tag = UInt(ppnLen.W)
val index = UInt(indexWidth.W)
val offset = UInt(offsetWidth.W)
}
def bankAddr = new Bundle {
val index = UInt(bankIndexWidth.W)
val offset = UInt(bankOffsetWidth.W)
}
val io = IO(new Bundle {
val cpu = Flipped(new Cache_DCache())
val axi = new DCache_AXIInterface()
})
// dcache的状态机
val s_idle :: s_uncached :: s_fence :: s_replace :: s_wait :: s_tlb_refill :: Nil = Enum(6)
val state = RegInit(s_idle)
// ptw的状态机
val ptw_handshake :: ptw_send :: ptw_cached :: ptw_uncached :: ptw_check :: ptw_set :: Nil = Enum(6)
val ptw_state = RegInit(ptw_handshake)
// 临时寄存器
val ptw_working =
ptw_state =/= ptw_handshake &&
ptw_state =/= ptw_set &&
!(io.cpu.tlb.ptw.pte.bits.access_fault || io.cpu.tlb.ptw.pte.bits.page_fault)
val ptw_scratch = RegInit(0.U.asTypeOf(new Bundle {
val paddr = pAddr
val replace = Bool()
val dcache_wait = Bool()
}))
io.cpu.tlb.ptw.vpn.ready := false.B
// ==========================================================
// | ppn | page offset |
// ----------------------------------------------------------
// | tag | index | offset |
// | | | bank index | bank offset |
// ==========================================================
// exe级的index用于访问第i行的数据
val exe_index = io.cpu.exe_addr(indexWidth + offsetWidth - 1, offsetWidth)
// mem级的bank的index用于访问第i个bank的数据
val bank_index = io.cpu.addr(bankIndexWidth + bankOffsetWidth - 1, bankOffsetWidth)
// // 一个bank行内存了一个数据所以bank_offset恒为0
// val bank_offset =
// if (bankOffsetWidth > log2Ceil(XLEN / 8))
// io.cpu.addr(bankOffsetWidth - 1, log2Ceil(XLEN / 8)) // 保证地址对齐
// else
// 0.U
// axi信号中size的宽度对于cached段size为3位
val cached_size = log2Ceil(AXI_DATA_WID / 8)
val cached_len = (nbank - 1)
// * valid dirty * //
// 每行有一个有效位和一个脏位
val valid = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B))))) // FIXMEnway放前面会导致栈溢出错误
val dirty = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B)))))
val lru = RegInit(VecInit(Seq.fill(nindex)(false.B))) // TODO:支持更多路数目前只支持2路
// 用于指示哪个行的脏位为真
val dirty_index = Wire(UInt(indexWidth.W))
dirty_index := PriorityEncoder(dirty.map(_.asUInt.orR))
// 用于指示哪个路的脏位为真
val dirty_way = dirty(dirty_index)(1)
// 表示进入fence的写回状态
val fence = RegInit(false.B)
// 读取bank这类sram的数据需要两拍
val readsram = RegInit(false.B)
// 对于uncached段使用writeFifo进行写回
val writeFifo = Module(new Queue(new WriteBufferUnit(), writeFifoDepth))
val writeFifo_axi_busy = RegInit(false.B)
val writeFifo_busy = writeFifo.io.deq.valid // || writeFifo_axi_busy 应该不需要这个判断
writeFifo.io.enq.valid := false.B
writeFifo.io.enq.bits := 0.U.asTypeOf(new WriteBufferUnit())
writeFifo.io.deq.ready := false.B
// * victim cache * //
val burst = RegInit(0.U.asTypeOf(new Bundle {
val wstrb = Vec(nway, UInt(nbank.W)) // 用于控制写回哪个bank
}))
// 用于解决在replace时发生写回时读写时序不一致的问题
val bank_wbindex = RegInit(0.U((offsetWidth - log2Ceil(XLEN / 8)).W))
val bank_wbdata = RegInit(VecInit(Seq.fill(nbank)(0.U(XLEN.W))))
// 是否使用exe的地址进行提前访存
val use_next_addr = (state === s_idle) || (state === s_wait)
val do_replace = RegInit(false.B)
// replace index 表示行的索引
val replace_index = Wire(UInt(indexWidth.W))
replace_index := io.cpu.addr(indexWidth + offsetWidth - 1, offsetWidth)
val replace_wstrb = Wire(Vec(nbank, Vec(nway, UInt(AXI_STRB_WID.W))))
val replace_wdata = Mux(state === s_replace, io.axi.r.bits.data, io.cpu.wdata)
val replace_way = lru(replace_index)
val replace_dirty = dirty(replace_index)(replace_way)
val tag_rindex = Mux(use_next_addr, exe_index, replace_index)
val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
val tag_wdata = RegInit(0.U(tagWidth.W))
val data = Wire(Vec(nbank, Vec(nway, UInt(XLEN.W))))
// 使用寄存器类型才能防止idle时tag出现无法hit的错误
val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
val tag_compare_valid = Wire(Vec(nway, Bool()))
val cache_hit = tag_compare_valid.contains(true.B)
val mmio_read_stall = io.cpu.tlb.uncached && !io.cpu.wen.orR
val mmio_write_stall = io.cpu.tlb.uncached && io.cpu.wen.orR && !writeFifo.io.enq.ready
val cached_stall = !io.cpu.tlb.uncached && !cache_hit
val select_way = tag_compare_valid(1)
val dcache_stall = Mux(
state === s_idle,
Mux(
io.cpu.en,
(cached_stall || mmio_read_stall || mmio_write_stall || !io.cpu.tlb.hit),
io.cpu.fence_i || fence
),
state =/= s_wait
)
io.cpu.dcache_ready := !dcache_stall
val saved_rdata = RegInit(0.U(XLEN.W))
io.cpu.rdata := Mux(state === s_wait, saved_rdata, data(bank_index)(select_way))
io.cpu.tlb.vaddr := io.cpu.addr
io.cpu.tlb.access_type := Mux(io.cpu.en && io.cpu.wen.orR, AccessType.store, AccessType.load)
io.cpu.tlb.en := io.cpu.en
val bank_raddr = Wire(UInt(indexWidth.W))
bank_raddr := Mux(state === s_fence, dirty_index, Mux(use_next_addr, exe_index, replace_index))
val tag_raddr = Mux(state === s_fence, dirty_index, tag_rindex)
val wstrb = Wire(Vec(nindex, (Vec(nway, UInt(AXI_STRB_WID.W)))))
wstrb := 0.U.asTypeOf(wstrb)
wstrb(bank_index)(select_way) := io.cpu.wstrb
// bank tagv ram
val tagRam = Seq.fill(nway)(Module(new LUTRam(nindex, tagWidth)))
for { i <- 0 until nway } {
val bank = Seq.fill(nbank)(Module(new SimpleDualPortRam(nindex, AXI_DATA_WID, byteAddressable = true)))
for { j <- 0 until nbank } {
bank(j).io.ren := true.B
bank(j).io.raddr := bank_raddr
data(j)(i) := bank(j).io.rdata
bank(j).io.wen := replace_wstrb(j)(i).orR
bank(j).io.waddr := replace_index
bank(j).io.wdata := replace_wdata
bank(j).io.wstrb := replace_wstrb(j)(i)
tagRam(i).io.raddr := tag_raddr
tag(i) := tagRam(i).io.rdata
tagRam(i).io.wen := tag_wstrb(i)
tagRam(i).io.waddr := replace_index
tagRam(i).io.wdata := tag_wdata
tag_compare_valid(i) :=
tag(i) === io.cpu.tlb.ptag && // tag相同
valid(replace_index)(i) && // cache行有效位为真
io.cpu.tlb.hit // 页表有效
replace_wstrb(j)(i) := Mux(
tag_compare_valid(i) && io.cpu.en && io.cpu.wen.orR && !io.cpu.tlb.uncached && state === s_idle,
wstrb(j)(i),
Fill(AXI_STRB_WID, burst.wstrb(i)(j))
)
}
}
val ar = RegInit(0.U.asTypeOf(new AR()))
val arvalid = RegInit(false.B)
io.axi.ar.bits <> ar
io.axi.ar.valid := arvalid
val rready = RegInit(false.B)
io.axi.r.ready := rready
val aw = RegInit(0.U.asTypeOf(new AW()))
val awvalid = RegInit(false.B)
io.axi.aw.bits <> aw
io.axi.aw.valid := awvalid
val w = RegInit(0.U.asTypeOf(new W()))
val wvalid = RegInit(false.B)
io.axi.w.bits <> w
io.axi.w.bits.last := w.last && wvalid
io.axi.w.valid := wvalid
io.axi.b.ready := true.B
val access_fault = RegInit(false.B)
val page_fault = RegInit(false.B)
// sv39的63-39位需要与第38位相同
val addr_err = io.cpu
.addr(XLEN - 1, VADDR_WID)
.asBools
.map(_ =/= io.cpu.addr(VADDR_WID - 1))
.reduce(_ || _)
io.cpu.access_fault := access_fault
io.cpu.page_fault := page_fault
// write buffer
when(writeFifo_axi_busy) {
when(io.axi.aw.fire) {
awvalid := false.B
}
when(io.axi.w.fire) {
wvalid := false.B
w.last := false.B
}
when(io.axi.b.fire) {
writeFifo_axi_busy := false.B
}
}.elsewhen(writeFifo.io.deq.valid) {
writeFifo.io.deq.ready := writeFifo.io.deq.valid
when(writeFifo.io.deq.fire) {
aw.addr := writeFifo.io.deq.bits.addr
aw.size := writeFifo.io.deq.bits.size
w.data := writeFifo.io.deq.bits.data
w.strb := writeFifo.io.deq.bits.strb
}
aw.len := 0.U
awvalid := true.B
w.last := true.B
wvalid := true.B
writeFifo_axi_busy := true.B
}
switch(state) {
is(s_idle) {
access_fault := false.B // 在idle时清除access_fault
page_fault := false.B // 在idle时清除page_fault
when(io.cpu.en) {
when(addr_err) {
access_fault := true.B
}.elsewhen(!io.cpu.tlb.hit) {
state := s_tlb_refill
}.elsewhen(io.cpu.tlb.uncached) {
when(io.cpu.wen.orR) {
when(writeFifo.io.enq.ready) {
writeFifo.io.enq.valid := true.B
writeFifo.io.enq.bits.addr := io.cpu.tlb.paddr
writeFifo.io.enq.bits.size := io.cpu.rlen
writeFifo.io.enq.bits.strb := io.cpu.wstrb
writeFifo.io.enq.bits.data := io.cpu.wdata
when(!io.cpu.complete_single_request) {
state := s_wait
}
}
}.elsewhen(!writeFifo_busy) {
ar.addr := io.cpu.tlb.paddr
ar.len := 0.U
ar.size := io.cpu.rlen
arvalid := true.B
state := s_uncached
rready := true.B
} // when store buffer busy, read will stop at s_idle but stall pipeline.
}.otherwise {
when(!cache_hit) {
state := s_replace
}.otherwise {
when(!dcache_stall) {
// update lru and mark dirty
replace_way := ~select_way
when(io.cpu.wen.orR) {
dirty(replace_index)(select_way) := true.B
}
when(!io.cpu.complete_single_request) {
saved_rdata := data(bank_index)(select_way)
state := s_wait
}
}
}
}
}.otherwise {
io.cpu.tlb.ptw.vpn.ready := !ptw_working
when(io.cpu.fence_i) {
// fence.i 需要将所有脏位为true的行写回
when(dirty.asUInt.orR) {
when(!writeFifo_busy) {
state := s_fence
readsram := false.B // bank读数据要两拍
}
}.otherwise {
// 当所有脏位为fault时fence.i可以直接完成
state := s_wait
}
}
}
}
is(s_uncached) {
when(arvalid && io.axi.ar.ready) {
arvalid := false.B
}
when(io.axi.r.fire) {
rready := false.B
saved_rdata := io.axi.r.bits.data
access_fault := io.axi.r.bits.resp =/= RESP_OKEY.U
state := s_wait
}
}
is(s_fence) {
when(fence) {
when(io.axi.aw.fire) {
awvalid := false.B
}
when(io.axi.w.fire) {
when(w.last) {
wvalid := false.B
}.otherwise {
bank_wbindex := bank_wbindex + 1.U
w.data := data(bank_wbindex + 1.U)(dirty_way)
when(bank_wbindex + 1.U === (cached_len).U) {
w.last := true.B
}
}
}
when(io.axi.b.valid) {
// TODO: 增加此处的acc_err错误处理
// acc_err := io.axi.b.bits.resp =/= RESP_OKEY.U
dirty(dirty_index)(dirty_way) := false.B // 写回完成清除脏位
fence := false.B
}
}.elsewhen(dirty.asUInt.orR) {
readsram := true.B
when(readsram) {
// for axi write
readsram := false.B
aw.addr := Cat(
Mux(dirty_way === 0.U, tagRam(0).io.rdata, tagRam(1).io.rdata),
dirty_index,
0.U(offsetWidth.W)
)
aw.len := cached_len.U
aw.size := cached_size.U
awvalid := true.B
w.data := data(0)(dirty_way) // 从第零块bank开始写回
w.strb := ~0.U(AXI_STRB_WID.W)
w.last := false.B
wvalid := true.B
bank_wbindex := 0.U
fence := true.B
}
}.otherwise {
state := s_wait
}
}
is(s_replace) {
// 防止和写队列冲突
when(!writeFifo_busy) {
when(do_replace) {
when(replace_dirty) {
when(io.axi.aw.fire) {
awvalid := false.B
}
when(io.axi.w.fire) {
when(w.last) {
wvalid := false.B
}.otherwise {
bank_wbindex := bank_wbindex + 1.U
w.data := bank_wbdata(bank_wbindex + 1.U)
when(bank_wbindex + 1.U === (cached_len).U) {
w.last := true.B
}
}
}
when(io.axi.b.valid) {
// TODO: 增加此处的acc_err错误处理
// acc_err := io.axi.b.bits.resp =/= RESP_OKEY.U
replace_dirty := false.B // 写回完成清除脏位
}
} //上面都是写回部分的代码
when(io.axi.ar.fire) {
tag_wstrb(replace_way) := false.B
arvalid := false.B
}
when(io.axi.r.fire) {
when(io.axi.r.bits.last) {
rready := false.B
burst.wstrb(replace_way) := 0.U
}.otherwise {
burst.wstrb(replace_way) := burst.wstrb(replace_way) << 1
}
}
when(
(!replace_dirty || io.axi.b.valid) && // 不需要替换或写回完成
((io.axi.r.valid && io.axi.r.bits.last) || !rready) // 读取完成
) {
valid(replace_index)(replace_way) := true.B
do_replace := false.B
ptw_scratch.replace := false.B
when(ptw_working && io.cpu.tlb.ptw.access_type =/= AccessType.fetch) {
// ptw复用的模式
state := s_tlb_refill
}.otherwise {
when(ptw_scratch.dcache_wait && !io.cpu.complete_single_request) {
state := s_wait
}.otherwise {
ptw_scratch.dcache_wait := false.B
state := s_idle
}
}
}
}.otherwise {
// 增加了一拍用于sram读取数据
readsram := true.B
when(readsram) {
readsram := false.B
do_replace := true.B
ar.len := cached_len.U
ar.size := cached_size.U // 8 字节
arvalid := true.B
rready := true.B
burst.wstrb(replace_way) := 1.U // 先写入第一块bank
tag_wstrb(replace_way) := true.B
when(!ptw_working) {
// dcache的普通模式
// for ar axi
ar.addr := Cat(io.cpu.tlb.paddr(PADDR_WID - 1, offsetWidth), 0.U(offsetWidth.W))
tag_wdata := io.cpu.tlb.ptag
}.otherwise {
// ptw复用的模式
ar.addr := Cat(ptw_scratch.paddr.tag, ptw_scratch.paddr.index, 0.U(offsetWidth.W))
tag_wdata := ptw_scratch.paddr.tag
}
when(replace_dirty) {
// cache行的脏位为真时需要写回备份一下cache行便于处理读写时序问题
(0 until nbank).map(i => bank_wbdata(i) := data(i)(replace_way))
aw.addr := Cat(tag(replace_way), replace_index, 0.U(offsetWidth.W))
aw.len := cached_len.U
aw.size := cached_size.U
awvalid := true.B
w.data := data(0)(replace_way)
w.strb := ~0.U(AXI_STRB_WID.W)
w.last := false.B
wvalid := true.B
bank_wbindex := 0.U
}
}
}
}
}
is(s_wait) {
// 等待流水线的allow_to_go信号防止多次发出读写请求
io.cpu.tlb.ptw.vpn.ready := !ptw_working
ptw_scratch.dcache_wait := true.B
when(io.cpu.complete_single_request) {
ptw_scratch.dcache_wait := false.B
access_fault := false.B // 清除access_fault
page_fault := false.B // 清除page_fault
state := s_idle
}
}
is(s_tlb_refill) {
io.cpu.tlb.ptw.vpn.ready := !ptw_working
when(io.cpu.tlb.access_fault) {
access_fault := true.B
state := s_wait
}.elsewhen(io.cpu.tlb.page_fault) {
page_fault := true.B
state := s_wait
}.otherwise {
when(io.cpu.tlb.hit) {
state := s_idle
}
}
}
}
// ==========================================================
// 实现页表访问回填tlb
val satp = io.cpu.tlb.csr.satp.asTypeOf(satpBundle)
val mstatus = io.cpu.tlb.csr.mstatus.asTypeOf(new Mstatus)
val mode = Mux(io.cpu.tlb.access_type === AccessType.fetch, io.cpu.tlb.csr.imode, io.cpu.tlb.csr.dmode)
val sum = mstatus.sum
val mxr = mstatus.mxr
val vpn = io.cpu.tlb.ptw.vpn.bits.asTypeOf(vpnBundle)
val access_type = io.cpu.tlb.ptw.access_type
val ppn = RegInit(0.U(ppnLen.W))
val vpn_index = RegInit(0.U(log2Up(level).W)) // 页表访问的层级
val pte = RegInit(0.U.asTypeOf(pteBundle)) // 页表项
io.cpu.tlb.ptw.pte.valid := false.B
io.cpu.tlb.ptw.pte.bits := DontCare
io.cpu.tlb.ptw.pte.bits.access_fault := false.B
io.cpu.tlb.ptw.pte.bits.page_fault := false.B
io.cpu.tlb.complete_single_request := io.cpu.complete_single_request
require(AXI_DATA_WID == XLEN) // 目前只考虑了AXI_DATA_WID == XLEN的情况
def raisePageFault(): Unit = {
io.cpu.tlb.ptw.pte.valid := true.B
io.cpu.tlb.ptw.pte.bits.page_fault := true.B
ptw_state := ptw_handshake
}
def modeCheck(): Unit = {
switch(mode) {
is(ModeS) {
when(pte.flag.u && !sum) {
raisePageFault()
}.otherwise {
ptw_state := ptw_set
}
}
is(ModeU) {
when(!pte.flag.u) {
raisePageFault()
}.otherwise {
ptw_state := ptw_set
}
}
}
}
switch(ptw_state) {
is(ptw_handshake) { // 0
// 页表访问虚地址握手
when(io.cpu.tlb.ptw.vpn.fire) {
vpn_index := (level - 1).U
ppn := satp.ppn
ptw_state := ptw_send
}
}
is(ptw_send) { // 1
val vpnn = Mux1H(
Seq(
(vpn_index === 0.U) -> vpn.vpn0,
(vpn_index === 1.U) -> vpn.vpn1,
(vpn_index === 2.U) -> vpn.vpn2
)
)
val ptw_addr = paddrApply(ppn, vpnn).asTypeOf(pAddr)
val pte_uncached = AddressSpace.isMMIO(ptw_addr.asUInt)
when(pte_uncached) {
arvalid := true.B
ar.addr := ptw_addr.asUInt
ar.size := log2Ceil(AXI_DATA_WID / 8).U // 一个pte的大小是8字节
ar.len := 0.U // 读一拍即可
rready := true.B
ptw_state := ptw_uncached
}.otherwise {
bank_raddr := ptw_addr.index
tagRam.map(_.io.raddr := ptw_addr.index)
replace_index := ptw_addr.index
ptw_state := ptw_cached
ptw_scratch.paddr := ptw_addr
ptw_scratch.replace := false.B
}
}
is(ptw_cached) { // 2
bank_raddr := ptw_scratch.paddr.index
tagRam.map(_.io.raddr := ptw_scratch.paddr.index)
replace_index := ptw_scratch.paddr.index
for { i <- 0 until nway } {
tag_compare_valid(i) :=
tag(i) === ptw_scratch.paddr.tag && // tag相同
valid(ptw_scratch.paddr.index)(i) // cache行有效位为真
}
when(!ptw_scratch.replace) {
when(cache_hit) {
val pte_temp = data(ptw_scratch.paddr.offset.asTypeOf(bankAddr).index)(select_way).asTypeOf(pteBundle)
when(!pte_temp.flag.v || !pte_temp.flag.r && pte_temp.flag.w) {
raisePageFault()
}.otherwise {
when(pte_temp.flag.r || pte_temp.flag.x) {
// 找到了叶子页
pte := pte_temp
ptw_state := ptw_check
}.otherwise {
// 该pte指向下一个页表
vpn_index := vpn_index - 1.U
when(vpn_index - 1.U < 0.U) {
raisePageFault()
}.otherwise {
ppn := pte_temp.ppn
ptw_state := ptw_send
}
}
}
}.otherwise {
ptw_scratch.replace := true.B
state := s_replace // 直接复用dcache的replace状态机帮我们进行replace操作
}
}
}
is(ptw_uncached) { // 3
when(io.axi.ar.fire) {
arvalid := false.B
}
when(io.axi.r.fire) {
rready := false.B
val pte_temp = io.axi.r.bits.data.asTypeOf(pteBundle)
when(!pte_temp.flag.v || !pte_temp.flag.r && pte_temp.flag.w) {
raisePageFault()
}.otherwise {
when(pte_temp.flag.r || pte_temp.flag.x) {
// 找到了叶子页
pte := pte_temp
ptw_state := ptw_check
}.otherwise {
// 该pte指向下一个页表
vpn_index := vpn_index - 1.U
when(vpn_index - 1.U < 0.U) {
raisePageFault()
}.otherwise {
ppn := pte_temp.ppn
ptw_state := ptw_send
}
}
}
}
}
is(ptw_check) { // 4
// 检查权限
switch(access_type) {
is(AccessType.load) {
when(mxr) {
when(!pte.flag.r && !pte.flag.x) {
raisePageFault()
}.otherwise {
modeCheck()
}
}.otherwise {
when(!pte.flag.r) {
raisePageFault()
}.otherwise {
modeCheck()
}
}
}
is(AccessType.store) {
when(!pte.flag.w) {
raisePageFault()
}.otherwise {
modeCheck()
}
}
is(AccessType.fetch) {
when(!pte.flag.x) {
raisePageFault()
}.otherwise {
modeCheck()
}
}
}
}
is(ptw_set) { // 5
when(
vpn_index > 0.U && (
vpn_index === 1.U && pte.ppn.asTypeOf(ppnBundle).ppn0.orR ||
vpn_index === 2.U && (pte.ppn.asTypeOf(ppnBundle).ppn1.orR || pte.ppn.asTypeOf(ppnBundle).ppn0.orR)
)
) {
raisePageFault()
}.elsewhen(!pte.flag.a || access_type === AccessType.store && !pte.flag.d) {
raisePageFault() // 使用软件的方式设置脏位以及访问位
}.otherwise {
// 翻译成功
val rmask = WireInit(~0.U(maskLen.W))
io.cpu.tlb.ptw.pte.valid := true.B
io.cpu.tlb.ptw.pte.bits.rmask := rmask
io.cpu.tlb.ptw.pte.bits.entry := pte
val ppn_set = Wire(ppnBundle)
when(vpn_index === 2.U) {
ppn_set.ppn2 := pte.ppn.asTypeOf(ppnBundle).ppn2
ppn_set.ppn1 := vpn.vpn1
ppn_set.ppn0 := vpn.vpn0
rmask := 0.U
}.elsewhen(vpn_index === 1.U) {
ppn_set.ppn2 := pte.ppn.asTypeOf(ppnBundle).ppn2
ppn_set.ppn1 := pte.ppn.asTypeOf(ppnBundle).ppn1
ppn_set.ppn0 := vpn.vpn0
rmask := Cat(Fill(ppn1Len, true.B), 0.U(ppn0Len.W))
}.otherwise {
ppn_set := pte.ppn.asTypeOf(ppnBundle)
}
io.cpu.tlb.ptw.pte.bits.entry.ppn := ppn_set.asUInt
ptw_state := ptw_handshake
}
}
}
println("----------------------------------------")
println("DCache: ")
println("nindex: " + nindex)
println("nbank: " + nbank)
println("bitsPerBank: " + bitsPerBank)
println("bankOffsetWidth: " + bankOffsetWidth)
println("bankIndexWidth: " + bankIndexWidth)
println("tagWidth: " + tagWidth)
println("indexWidth: " + indexWidth)
println("offsetWidth: " + offsetWidth)
println("----------------------------------------")
}

View File

@ -1,367 +0,0 @@
package cache
import chisel3._
import chisel3.util._
import memory._
import cpu.CacheConfig
import cpu.defines._
import cpu.CpuConfig
import cpu.defines.Const._
/*
整个宽度为PADDR_WID的地址
==========================================================
| tag | index | offset |
| | | bank index | bank offset |
==========================================================
nway nindex
======================================================
| valid | tag | bank 0 | bank 1 | ... | bank n |
| 1 | | | | | |
======================================================
| bank |
| inst 0 | inst 1 | ... | inst n |
| 32 | 32 | ... | 32 |
=====================================
本CPU的实现如下
每个bank分为多个instBlocks每个instBlocks的宽度为AXI_DATA_WID这样能方便的和AXI总线进行交互
RV64实现中AXI_DATA_WID为64所以每个instBlocks可以存储2条指令
而instBlocks的个数会和instFetchNum相关
- 当instFetchNum为4时instBlocks的个数为2
- 当instFetchNum为2时instBlocks的个数为1
读取数据时会将一个bank中的所有instBlocks读取出来然后再将instBlocks中的数据按照偏移量重新排列
这样的设计可以保证一个bank的指令数对应instFetchNum
======================================================
| valid | tag | bank 0 | bank 1 | ... | bank n |
| 1 | | | | | |
======================================================
| bank |
| instBlocks | instBlocks |
| inst 0 | inst 1 | inst 0 | inst 1 |
| 32 | 32 | 32 | 32 |
=====================================
*/
class ICache(cacheConfig: CacheConfig)(implicit cpuConfig: CpuConfig) extends Module with HasTlbConst {
val nway = cacheConfig.nway
val nindex = cacheConfig.nindex
val nbank = cacheConfig.nbank
val instFetchNum = cpuConfig.instFetchNum
val bankOffsetWidth = cacheConfig.bankOffsetWidth
val bankIndexWidth = cacheConfig.offsetWidth - bankOffsetWidth
val bytesPerBank = cacheConfig.bytesPerBank
val tagWidth = cacheConfig.tagWidth
val indexWidth = cacheConfig.indexWidth
val offsetWidth = cacheConfig.offsetWidth
val bitsPerBank = cacheConfig.bitsPerBank
def pAddr = new Bundle {
val tag = UInt(ppnLen.W)
val index = UInt(indexWidth.W)
val offset = UInt(offsetWidth.W)
}
def bankAddr = new Bundle {
val index = UInt(bankIndexWidth.W)
val offset = UInt(bankOffsetWidth.W)
}
val io = IO(new Bundle {
val cpu = Flipped(new Cache_ICache())
val axi = new ICache_AXIInterface()
})
require(isPow2(instFetchNum), "ninst must be power of 2")
require(instFetchNum == bytesPerBank / 4, "instFetchNum must equal to instperbank")
require(
bitsPerBank >= AXI_DATA_WID && bitsPerBank % AXI_DATA_WID == 0,
"bitsPerBank must be greater than AXI_DATA_WID"
)
// 一个bank是bitsPerBank宽度一个bank中有instFetchNum个指令
// 每个bank中指令块的个数一个指令块是AXI_DATA_WID宽度
val instBlocksPerBank = bitsPerBank / AXI_DATA_WID
val bank_index = io.cpu.addr(0)(offsetWidth - 1, bankOffsetWidth)
val bank_offset = io.cpu.addr(0)(bankOffsetWidth - 1, log2Ceil(INST_WID / 8)) // PC低2位必定是0
// * fsm * //
val s_idle :: s_uncached :: s_replace :: s_wait :: s_fence :: s_tlb_refill :: Nil = Enum(6)
val state = RegInit(s_idle)
// nway 每路 nindex 每行 nbank bank每行的nbank共用一个valid
val valid = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(nindex)(false.B)))))
// * should choose next addr * //
val use_next_addr = (state === s_idle) || (state === s_wait)
// 读取一个cache条目中的所有bank行
val data = Wire(Vec(nway, Vec(nbank, Vec(instBlocksPerBank, UInt(AXI_DATA_WID.W)))))
val data_rindex = io.cpu.addr(use_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
val tag_raddr = io.cpu.addr(use_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
val tag_wdata = RegInit(0.U(tagWidth.W))
// * lru * //// TODO:检查lru的正确性增加可拓展性目前只支持两路的cache
val lru = RegInit(VecInit(Seq.fill(nindex)(false.B)))
val replace_index = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth)
// 需要替换的路号
val replace_way = lru(replace_index)
// 用于控制写入一行cache条目中的哪个bank, 一个bank可能有多次写入
val replace_wstrb = RegInit(
VecInit(Seq.fill(nway)(VecInit(Seq.fill(nbank)(VecInit(Seq.fill(instBlocksPerBank)((false.B)))))))
)
// * cache hit * //
val tag_compare_valid = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.ptag && valid(i)(replace_index)))
val cache_hit = tag_compare_valid.contains(true.B)
val cache_hit_available = cache_hit && io.cpu.tlb.hit && !io.cpu.tlb.uncached
val select_way = tag_compare_valid(1) // 1路命中时值为10路命中时值为0 //TODO:支持更多路数
// 将一个 bank 中的指令分成 instFetchNum 每份 INST_WID bit
val inst_in_bank = VecInit(
Seq.tabulate(instFetchNum)(i => data(select_way)(bank_index).asUInt((i + 1) * INST_WID - 1, i * INST_WID))
)
// inst_in_bank 中的指令按照 bank_offset 位偏移量重新排列
// 处理偏移导致的跨 bank 读取
// 当offset为0时不需要重新排列
// 当offset为1时此时发送到cpu的inst0应该是inst1inst1应该无数据并设置对应的valid
val inst = VecInit(
Seq.tabulate(instFetchNum)(i =>
Mux(
i.U <= ((instFetchNum - 1).U - bank_offset),
inst_in_bank(i.U + bank_offset),
0.U
)
)
)
val inst_valid = VecInit(
Seq.tabulate(instFetchNum)(i => cache_hit_available && i.U <= ((instFetchNum - 1).U - bank_offset))
)
val rdata_in_wait = RegInit(VecInit(Seq.fill(instFetchNum)(0.U.asTypeOf(new Bundle {
val inst = UInt(INST_WID.W)
val valid = Bool()
}))))
// 对于可缓存段访存时读取的数据宽度应该和AXI_DATA的宽度相同
val cached_size = log2Ceil(AXI_DATA_WID / 8)
val cached_len = (nbank * instBlocksPerBank - 1)
// 对于不可缓存段访存时读取的数据宽度应该和指令宽度相同
val uncached_size = log2Ceil(INST_WID / 8)
val uncached_len = 0
// bank tag ram
for { i <- 0 until nway } {
// 每一个条目中有nbank个bank每个bank存储instFetchNum个指令
// 每次写入cache时将写完一整个cache行
val bank =
Seq.fill(nbank)(
Seq.fill(instBlocksPerBank)(
Module(new SimpleDualPortRam(depth = nindex, width = AXI_DATA_WID, byteAddressable = false))
)
)
for { j <- 0 until nbank } {
for { k <- 0 until instBlocksPerBank } {
bank(j)(k).io.ren := true.B
bank(j)(k).io.raddr := data_rindex
data(i)(j)(k) := bank(j)(k).io.rdata
bank(j)(k).io.wen := replace_wstrb(i)(j)(k)
bank(j)(k).io.waddr := replace_index
bank(j)(k).io.wdata := io.axi.r.bits.data
bank(j)(k).io.wstrb := replace_wstrb(i)(j)(k)
}
}
}
for { i <- 0 until instFetchNum } {
io.cpu.inst_valid(i) := Mux(state === s_idle, inst_valid(i), rdata_in_wait(i).valid) && io.cpu.req
io.cpu.inst(i) := Mux(state === s_idle, inst(i), rdata_in_wait(i).inst)
}
for { i <- 0 until nway } {
// 实例化了nway个tag ram
val tagBram = Module(new LUTRam(nindex, tagWidth))
tagBram.io.raddr := tag_raddr
tag(i) := tagBram.io.rdata
tagBram.io.wen := tag_wstrb(i)
tagBram.io.waddr := replace_index
tagBram.io.wdata := tag_wdata
}
io.cpu.icache_stall := Mux(state === s_idle, (!cache_hit_available && io.cpu.req), state =/= s_wait)
io.cpu.tlb.vaddr := io.cpu.addr(0)
io.cpu.tlb.complete_single_request := io.cpu.complete_single_request
io.cpu.tlb.en := io.cpu.req && (state === s_idle || state === s_tlb_refill)
val ar = RegInit(0.U.asTypeOf(new AR()))
val arvalid = RegInit(false.B)
ar <> io.axi.ar.bits
arvalid <> io.axi.ar.valid
val r = RegInit(0.U.asTypeOf(new R()))
val rready = RegInit(false.B)
r <> io.axi.r.bits
rready <> io.axi.r.ready
val access_fault = RegInit(false.B)
val page_fault = RegInit(false.B)
val addr_misaligned = RegInit(false.B)
// sv39的63-39位不与第38位相同或者地址未对齐时地址错
val addr_err =
io.cpu
.addr(use_next_addr)(XLEN - 1, VADDR_WID)
.asBools
.map(_ =/= io.cpu.addr(use_next_addr)(VADDR_WID - 1))
.reduce(_ || _) ||
io.cpu.addr(use_next_addr)(log2Ceil(INST_WID / 8) - 1, 0).orR
io.cpu.access_fault := access_fault //TODO实现cached段中的访存response错误
io.cpu.page_fault := page_fault
io.cpu.addr_misaligned := addr_misaligned
switch(state) {
is(s_idle) {
access_fault := false.B // 在idle时清除access_fault
page_fault := false.B // 在idle时清除page_fault
addr_misaligned := false.B // 在idle时清除addr_misaligned
when(io.cpu.req) {
when(addr_err) {
when(io.cpu.addr(use_next_addr)(log2Ceil(INST_WID / 8) - 1, 0).orR) {
addr_misaligned := true.B
}.otherwise {
access_fault := true.B
}
state := s_wait
rdata_in_wait(0).inst := Instructions.NOP
rdata_in_wait(0).valid := true.B
}.elsewhen(!io.cpu.tlb.hit) {
state := s_tlb_refill
}.elsewhen(io.cpu.tlb.uncached) {
state := s_uncached
ar.addr := io.cpu.tlb.paddr
ar.len := uncached_len.U
ar.size := uncached_size.U
arvalid := true.B
}.elsewhen(!cache_hit) {
state := s_replace
// 取指时按bank块取指
ar.addr := Cat(io.cpu.tlb.paddr(PADDR_WID - 1, offsetWidth), 0.U(offsetWidth.W))
ar.len := cached_len.U
ar.size := cached_size.U
arvalid := true.B
replace_wstrb(replace_way).map(_.map(_ := false.B))
replace_wstrb(replace_way)(0)(0) := true.B // 从第一个bank的第一个指令块开始写入
tag_wstrb(replace_way) := true.B
tag_wdata := io.cpu.tlb.ptag
valid(replace_way)(replace_index) := true.B
}.elsewhen(!io.cpu.icache_stall) {
replace_way := ~select_way
when(!io.cpu.complete_single_request) {
state := s_wait
(1 until instFetchNum).foreach(i => rdata_in_wait(i).inst := inst(i))
(0 until instFetchNum).foreach(i => rdata_in_wait(i).valid := inst_valid(i))
}
}
}
}
is(s_uncached) {
when(io.axi.ar.valid) {
when(io.axi.ar.ready) {
arvalid := false.B
rready := true.B
}
}.elsewhen(io.axi.r.fire) {
// * uncached not support burst transport * //
rdata_in_wait(0).inst := Mux(ar.addr(2), io.axi.r.bits.data(63, 32), io.axi.r.bits.data(31, 0))
rdata_in_wait(0).valid := true.B
rready := false.B
access_fault := io.axi.r.bits.resp =/= RESP_OKEY.U
state := s_wait
}
}
is(s_replace) {
when(io.axi.ar.valid) {
when(io.axi.ar.ready) {
arvalid := false.B
rready := true.B
}
}.elsewhen(io.axi.r.fire) {
// * burst transport * //
when(!io.axi.r.bits.last) {
// 左移写掩码写入下一个bank或是同一个bank的下一个指令
replace_wstrb(replace_way) :=
((replace_wstrb(replace_way).asUInt << 1)).asTypeOf(replace_wstrb(replace_way))
}.otherwise {
rready := false.B
replace_wstrb(replace_way).map(_.map(_ := false.B))
tag_wstrb(replace_way) := false.B
}
}.elsewhen(!io.axi.r.ready) {
state := s_idle
}
}
is(s_wait) {
// 等待流水线的allow_to_go信号防止多次发出读请求
when(io.cpu.complete_single_request) {
access_fault := false.B // 清除access_fault
page_fault := false.B // 清除page_fault
addr_misaligned := false.B // 清除addr_misaligned
state := s_idle
(0 until instFetchNum).foreach(i => rdata_in_wait(i).valid := false.B)
}
}
is(s_fence) {
// 等待dcache完成写回操作且等待axi总线完成读取操作因为icache发生状态转移时可能正在读取数据
when(!io.cpu.dcache_stall && !io.axi.r.valid) {
state := s_idle
}
}
is(s_tlb_refill) {
when(io.cpu.tlb.access_fault) {
access_fault := true.B
state := s_wait
rdata_in_wait(0).inst := Instructions.NOP
rdata_in_wait(0).valid := true.B
}.elsewhen(io.cpu.tlb.page_fault) {
page_fault := true.B
state := s_wait
rdata_in_wait(0).inst := Instructions.NOP
rdata_in_wait(0).valid := true.B
}.otherwise {
when(io.cpu.tlb.hit) {
state := s_idle
}
}
}
}
// * fence * //
// 不论icache在什么状态fence指令优先度最高会强制将icache状态转移为s_fence
when(io.cpu.fence_i) {
valid := 0.U.asTypeOf(valid) // fence.i指令需要icache等同于将所有valid位置0
state := s_fence
}
println("----------------------------------------")
println("ICache: ")
println("nindex: " + nindex)
println("nbank: " + nbank)
println("bankOffsetWidth: " + bankOffsetWidth)
println("bytesPerBank: " + bytesPerBank)
println("tagWidth: " + tagWidth)
println("indexWidth: " + indexWidth)
println("offsetWidth: " + offsetWidth)
println("----------------------------------------")
}

View File

@ -1,64 +0,0 @@
package cache.memory
import chisel3._
import chisel3.util._
import cpu.CpuConfig
/** LUT ram for XPM, one port for read/write, one port for read
* @param depth
* how many lines there are in the bank
* @param width
* how wide in bits each line is
* @param config
* implicit configuration to control generate ram for simulation or elaboration
*/
class LUTRam(depth: Int, width: Int)(implicit val cpuConfig: CpuConfig) extends Module {
require(isPow2(depth))
val waddridth = log2Ceil(depth)
val io = IO(new Bundle {
val raddr = Input(UInt(waddridth.W))
val rdata = Output(UInt(width.W))
val waddr = Input(UInt(waddridth.W))
val wdata = Input(UInt(width.W))
val wen = Input(Bool())
val writeOutput = Output(UInt(width.W))
})
if (cpuConfig.build) {
val bank = Module(
new LUTRamIP(
wdataidth = width,
waddridth = waddridth,
byteWriteWidth = width,
numberOfLines = depth
)
)
bank.io.clka := clock
bank.io.clkb := clock
bank.io.rsta := reset
bank.io.rstb := reset
bank.io.regcea := false.B
bank.io.regceb := false.B
bank.io.ena := true.B
bank.io.enb := true.B
bank.io.addra := io.waddr
bank.io.wea := io.wen
bank.io.dina := io.wdata
io.writeOutput := DontCare
bank.io.addrb := io.raddr
io.rdata := bank.io.doutb
} else {
val bank = RegInit(VecInit(Seq.fill(depth)(0.U(width.W))))
io.rdata := bank(io.raddr)
io.writeOutput := DontCare
when(io.wen) {
bank(io.waddr) := io.wdata
}.otherwise {
io.writeOutput := bank(io.waddr)
}
}
}

View File

@ -1,65 +0,0 @@
package cache.memory
import chisel3._
import chisel3.util.log2Ceil
/** XPM 2019.2 XPM_MEMORY_DPDISTRAM, at page 124 of UG953(2019.2) by default, this is initialized to
* all 0
*
* @param wdataidth
* : the size of the data to store in each line, in bits
* @param waddridth
* : the width of request
* @param byteWriteWidth
* : addressable size of write
* @param numberOfLines
* : how many **bits** there are in the memory
*/
class LUTRamIP(wdataidth: Int, waddridth: Int, byteWriteWidth: Int, numberOfLines: Int)
extends BlackBox(
Map(
"ADDR_WIDTH_A" -> waddridth,
"ADDR_WIDTH_B" -> waddridth,
"MEMORY_SIZE" -> numberOfLines * wdataidth,
"WRITE_DATA_WIDTH_A" -> wdataidth,
"READ_DATA_WIDTH_A" -> wdataidth,
"READ_DATA_WIDTH_B" -> wdataidth,
"BYTE_WRITE_WIDTH_A" -> byteWriteWidth,
"READ_LATENCY_A" -> 0,
"READ_LATENCY_B" -> 0,
"READ_RESET_VALUE_A" -> 0,
"READ_RESET_VALUE_B" -> 0,
"CLOCKING_MODE" -> "common_clock",
),
) {
override def desiredName: String = "xpm_memory_dpdistram"
require(
waddridth == log2Ceil(numberOfLines),
"request width should be log 2 of number of lines to request all",
)
require(
wdataidth - (wdataidth / byteWriteWidth) * byteWriteWidth == 0,
"data width should be a multiple of byte write width",
)
require(waddridth <= 20, "request width should be 1 to 20")
val io = IO(new Bundle {
val clka = Input(Clock())
val clkb = Input(Clock())
val rsta = Input(Reset())
val rstb = Input(Reset())
val ena = Input(Bool())
val enb = Input(Bool())
val regcea = Input(Bool())
val regceb = Input(Bool())
val dina = Input(UInt(wdataidth.W))
val addra = Input(UInt(waddridth.W))
val addrb = Input(UInt(waddridth.W))
val wea = Input(UInt((wdataidth / byteWriteWidth).W))
val douta = Output(UInt(wdataidth.W))
val doutb = Output(UInt(wdataidth.W))
})
}

View File

@ -1,37 +0,0 @@
package cache.memory
import chisel3._
import chisel3.util._
import cpu.CacheConfig
class ReadOnlyPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
val data = Output(gen)
}
class WriteOnlyPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
val en = Input(Bool())
val data = Input(gen)
}
class WriteOnlyMaskPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
val en = Input(UInt(cacheConfig.bytesPerBank.W))
val data = Input(gen)
}
class ReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
val en = Input(Bool())
val wdata = Input(gen)
val rdata = Output(gen)
}
class MaskedReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
val writeMask = Input(UInt(cacheConfig.bytesPerBank.W))
val wdata = Input(gen)
val rdata = Output(gen)
}

View File

@ -1,96 +0,0 @@
package cache.memory
import chisel3._
import chisel3.stage.{ChiselGeneratorAnnotation, ChiselStage}
import chisel3.util._
import cpu.CpuConfig
/** simple dual port ram, with a port for reading and a port for writing
*
* @param depth
* how many lines there are in the ram
* @param width
* how wide in bits each line is
* @param byteAddressable
* is it byte addressable?
* @param cpuCfg
* the implicit configuration for simulation and elaboration
*/
class SimpleDualPortRam(
depth: Int,
width: Int,
byteAddressable: Boolean
)(
implicit
val cpuConfig: CpuConfig)
extends Module {
require(isPow2(depth))
require(
width % 8 == 0 || !byteAddressable,
"if memory is byte addressable, then the adderss width must be a multiple of 8"
)
val waddridth = log2Ceil(depth)
val io = IO(new Bundle {
val raddr = Input(UInt(waddridth.W))
val ren = Input(Bool())
val rdata = Output(UInt(width.W))
val waddr = Input(UInt(waddridth.W))
val wen = Input(Bool())
val wstrb = Input(UInt((if (byteAddressable) width / 8 else 1).W))
val wdata = Input(UInt(width.W))
})
if (cpuConfig.build) {
val memory = Module(
new SimpleDualPortRamIP(
wdataidth = width,
byteWriteWidth = if (byteAddressable) 8 else width,
numberOfLines = depth,
waddridth = waddridth
)
)
memory.io.clka := clock
memory.io.clkb := clock
memory.io.rstb := reset
memory.io.addra := io.waddr
memory.io.ena := io.wen
memory.io.dina := io.wdata
memory.io.wea := io.wstrb
memory.io.addrb := io.raddr
memory.io.enb := io.ren
memory.io.regceb := false.B
io.rdata := memory.io.doutb
} else {
assert(
io.wstrb.orR || !io.wen,
"when write port enable is high, write vector cannot be all 0"
)
if (byteAddressable) {
val bank = SyncReadMem(depth, Vec(width / 8, UInt(8.W)))
when(io.ren) {
io.rdata := bank.read(io.raddr).asTypeOf(UInt(width.W))
}.otherwise {
io.rdata := DontCare
}
when(io.wen) {
bank.write(io.waddr, io.wdata.asTypeOf(Vec(width / 8, UInt(8.W))), io.wstrb.asBools)
}
} else {
val bank = SyncReadMem(depth, UInt(width.W))
when(io.ren) {
io.rdata := bank.read(io.raddr)
}.otherwise {
io.rdata := 0.U(32.W)
}
when(io.wen) {
bank.write(io.waddr, io.wdata)
}
}
}
}

View File

@ -1,68 +0,0 @@
package cache.memory
import chisel3._
import chisel3.util.log2Ceil
/** simple dual port ram
*
* @param wdataidth
* : width of every data line
* @param byteWriteWidth
* : how many bits to write per mask
* @param numberOfLines
* : how many lines of data are in the ram
* @param waddridth
* : how wide is the request (to cover all lines)
* @param memoryPrimitive
* : should I use auto, block ram or distributed ram
*/
class SimpleDualPortRamIP(
wdataidth: Int = 32,
byteWriteWidth: Int = 8,
numberOfLines: Int,
waddridth: Int,
memoryPrimitive: String = "block",
) extends BlackBox(
Map(
"ADDR_WIDTH_A" -> waddridth,
"ADDR_WIDTH_B" -> waddridth,
"WRITE_DATA_WIDTH_A" -> wdataidth,
"READ_DATA_WIDTH_B" -> wdataidth,
"BYTE_WRITE_WIDTH_A" -> byteWriteWidth,
"CLOCKING_MODE" -> "common_clock",
"READ_LATENCY_B" -> 1,
"MEMORY_SIZE" -> numberOfLines * wdataidth,
"MEMORY_PRIMITIVE" -> memoryPrimitive,
),
) {
override def desiredName: String = "xpm_memory_sdpram"
require(waddridth <= 20, "request width should be 1 to 20")
require(
wdataidth - (wdataidth / byteWriteWidth) * byteWriteWidth == 0,
"data width should be a multiple of byte write width",
)
require(
List("auto", "block", "distributed", "ultra").contains(memoryPrimitive),
"memory primitive should be auto, block ram, dist ram or ultra ram",
)
require(
waddridth == log2Ceil(numberOfLines),
"request width should be log 2 of number of lines to request all",
)
val io = IO(new Bundle {
// clock and reset
val clka = Input(Clock())
val clkb = Input(Clock())
val rstb = Input(Reset())
val addra = Input(UInt(waddridth.W))
val dina = Input(UInt(wdataidth.W))
val ena = Input(Bool())
val wea = Input(UInt((wdataidth / byteWriteWidth).W))
val addrb = Input(UInt(waddridth.W))
val enb = Input(Bool())
val regceb = Input(Bool())
val doutb = Output(UInt(wdataidth.W))
})
}

View File

@ -1,428 +0,0 @@
package icache.mmu
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
import cpu.CacheConfig
import cpu.pipeline.execute.CsrTlb
import cpu.CpuConfig
object AccessType {
def apply() = UInt(2.W)
def fetch = "b00".U
def load = "b01".U
def store = "b10".U
}
class Tlb_Ptw extends Bundle with HasTlbConst {
val vpn = Decoupled(UInt(vpnLen.W))
val access_type = Output(AccessType())
val pte = Flipped(Decoupled(new Bundle {
val access_fault = Bool()
val page_fault = Bool()
val entry = pteBundle
val rmask = UInt(maskLen.W)
}))
}
class Tlb_ICache extends Bundle with HasTlbConst {
val en = Input(Bool())
val vaddr = Input(UInt(XLEN.W))
val complete_single_request = Input(Bool())
val uncached = Output(Bool())
val hit = Output(Bool())
val ptag = Output(UInt(cacheTagLen.W))
val paddr = Output(UInt(PADDR_WID.W))
val access_fault = Output(Bool())
val page_fault = Output(Bool())
}
class Tlb_DCache extends Tlb_ICache {
val access_type = Input(AccessType())
// ptw 相关参数
val ptw = new Tlb_Ptw()
val csr = new CsrTlb()
}
class Tlb extends Module with HasTlbConst with HasCSRConst {
val io = IO(new Bundle {
val icache = new Tlb_ICache()
val dcache = new Tlb_DCache()
val csr = Flipped(new CsrTlb())
val sfence_vma = Input(new MouTlb())
})
val satp = io.csr.satp.asTypeOf(satpBundle)
val mstatus = io.csr.mstatus.asTypeOf(new Mstatus)
val imode = io.csr.imode
val dmode = io.csr.dmode
// 当SUM=0S模式内存访问U模式可访问的页面U=1将出现故障
// 当SUM=1这些访问是允许的当基于页面的虚拟内存不生效时SUM无效
// 请注意虽然SUM通常在不在S模式下执行时被忽略但当MPRV=1和MPP=S时SUM有效
val sum = mstatus.sum
// 当MXR=0只有标记为可读的页面R=1的加载才会成功
// 当MXR=1标记为可读或可执行的页面R=1或X=1的加载才会成功
// 当基于页面的虚拟内存无效时MXR无效
val mxr = mstatus.mxr
// 只有当satp.mode为8且当前模式低于M模式时才启用虚拟内存
val ivm_enabled = (satp.mode === 8.U) && (imode < ModeM)
val dvm_enabled = (satp.mode === 8.U) && (dmode < ModeM)
val itlb = RegInit(0.U.asTypeOf(tlbBundle))
val dtlb = RegInit(0.U.asTypeOf(tlbBundle))
val tlbl2 = RegInit(VecInit(Seq.fill(cpuConfig.tlbEntries)(0.U.asTypeOf(tlbBundle))))
val ivpn = io.icache.vaddr(VADDR_WID - 1, pageOffsetLen)
val dvpn = io.dcache.vaddr(VADDR_WID - 1, pageOffsetLen)
// (VPN一致)(ASID一致或PTE.G为1时)(PTE.V为1)TLB命中
val itlbl1_hit = vpnEq(itlb.rmask, ivpn, itlb.vpn) &&
(itlb.asid === satp.asid || itlb.flag.g) &&
itlb.flag.v
val dtlbl1_hit = vpnEq(dtlb.rmask, dvpn, dtlb.vpn) &&
(dtlb.asid === satp.asid || dtlb.flag.g) &&
dtlb.flag.v
val il2_hit_vec = VecInit(
tlbl2.map(tlb =>
vpnEq(tlb.rmask, ivpn, tlb.vpn) &&
(tlb.asid === satp.asid || tlb.flag.g) &&
tlb.flag.v
)
)
val dl2_hit_vec = VecInit(
tlbl2.map(tlb =>
vpnEq(tlb.rmask, dvpn, tlb.vpn) &&
(tlb.asid === satp.asid || tlb.flag.g) &&
tlb.flag.v
)
)
val search_l1 :: search_l2 :: search_pte :: search_fault :: Nil = Enum(4)
val immu_state = RegInit(search_l1)
val dmmu_state = RegInit(search_l1)
// 使用随机的方法替换TLB条目
val replace_index = new Counter(cpuConfig.tlbEntries)
val ipage_fault = RegInit(false.B)
val dpage_fault = RegInit(false.B)
val iaccess_fault = RegInit(false.B)
val daccess_fault = RegInit(false.B)
// ptw的请求标志0位为指令tlb请求1位为数据tlb请求
val req_ptw = WireInit(VecInit(Seq.fill(2)(false.B)))
val ar_sel_lock = RegInit(false.B)
val ar_sel_val = RegInit(false.B)
// 我们默认优先发送数据tlb的请求
val choose_icache = Mux(ar_sel_lock, ar_sel_val, req_ptw(0) && !req_ptw(1))
when(io.dcache.ptw.vpn.valid) {
when(io.dcache.ptw.vpn.ready) {
ar_sel_lock := false.B
}.otherwise {
ar_sel_lock := true.B
ar_sel_val := choose_icache
}
}
io.icache.hit := false.B
io.dcache.hit := false.B
io.icache.access_fault := iaccess_fault
io.dcache.access_fault := daccess_fault
io.icache.page_fault := ipage_fault
io.dcache.page_fault := dpage_fault
// 将ptw模块集成到dcache中ptw通过dcache的axi进行内存访问
io.dcache.ptw.vpn.valid := Mux(choose_icache, req_ptw(0), req_ptw(1))
io.dcache.ptw.access_type := Mux(choose_icache, AccessType.fetch, io.dcache.access_type)
io.dcache.ptw.vpn.bits := Mux(choose_icache, ivpn, dvpn)
io.dcache.ptw.pte.ready := true.B // 恒为true
io.dcache.csr <> io.csr
def imodeCheck(): Unit = {
switch(imode) {
is(ModeS) {
when(itlb.flag.u && sum === 0.U) {
ipage_fault := true.B
immu_state := search_fault
}.otherwise {
io.icache.hit := true.B
}
}
is(ModeU) {
when(!itlb.flag.u) {
ipage_fault := true.B
immu_state := search_fault
}.otherwise {
io.icache.hit := true.B
}
}
}
}
def dmodeCheck(): Unit = {
switch(dmode) {
is(ModeS) {
when(dtlb.flag.u && sum === 0.U) {
dpage_fault := true.B
dmmu_state := search_fault
}.otherwise {
io.dcache.hit := true.B
}
}
is(ModeU) {
when(!dtlb.flag.u) {
dpage_fault := true.B
dmmu_state := search_fault
}.otherwise {
io.dcache.hit := true.B
}
}
}
}
// ---------------------------------------------------
// ----------------- 指令虚实地址转换 -----------------
// ---------------------------------------------------
switch(immu_state) {
is(search_l1) {
when(io.icache.en) {
// 在icache实现访问tlb的pma和pmp权限检查
ipage_fault := false.B
iaccess_fault := false.B
when(!ivm_enabled) {
io.icache.hit := true.B
}.elsewhen(itlbl1_hit) {
// 在这里进行取指需要的所有的权限检查
// 0. X位检查只有可执行的页面才能取指
// 1. M模式不可能到这里因为vm_enabled为false
// 2. S模式如果U位为1需要检查SUM
// 3. U模式必须保证U位为1
io.icache.hit := false.B // 只有权限检查通过后可以置为true
when(!itlb.flag.x) {
ipage_fault := true.B
immu_state := search_fault
}.otherwise {
imodeCheck()
}
}.otherwise {
immu_state := search_l2
}
}
}
is(search_l2) {
when(il2_hit_vec.asUInt.orR) {
immu_state := search_l1
itlb := tlbl2(PriorityEncoder(il2_hit_vec))
}.otherwise {
req_ptw(0) := true.B
when(choose_icache && io.dcache.ptw.vpn.ready) {
immu_state := search_pte
}
}
}
is(search_pte) {
req_ptw(0) := true.B
when(io.dcache.ptw.pte.valid) {
when(io.dcache.ptw.pte.bits.access_fault) {
iaccess_fault := true.B
immu_state := search_fault
}.elsewhen(io.dcache.ptw.pte.bits.page_fault) {
ipage_fault := true.B
immu_state := search_fault
}.otherwise {
// 在内存中找寻到了页表将其写入TLB
val replace_entry = Wire(tlbBundle)
replace_entry.vpn := ivpn
replace_entry.asid := satp.asid
replace_entry.flag := io.dcache.ptw.pte.bits.entry.flag
replace_entry.ppn := io.dcache.ptw.pte.bits.entry.ppn
replace_entry.rmask := io.dcache.ptw.pte.bits.rmask
tlbl2(replace_index.value) := replace_entry
itlb := replace_entry
replace_index.inc()
immu_state := search_l1
}
}
}
is(search_fault) {
when(io.icache.complete_single_request) {
ipage_fault := false.B
iaccess_fault := false.B
immu_state := search_l1
}
}
}
// ---------------------------------------------------
// ----------------- 数据虚实地址转换 -----------------
// ---------------------------------------------------
switch(dmmu_state) {
is(search_l1) {
when(io.dcache.en) {
// 在dcache实现访问tlb的pma和pmp权限检查
dpage_fault := false.B
daccess_fault := false.B
when(!dvm_enabled) {
io.dcache.hit := true.B
}.elsewhen(dtlbl1_hit) {
// 在这里进行取指需要的所有的权限检查
// 如果是load
// 0. MXR位检查分类0和1的情况
// 1. M模式不可能到这里因为vm_enabled为false
// 2. S模式如果U位为1需要检查SUM
// 3. U模式必须保证U位为1
io.dcache.hit := false.B // 只有权限检查通过后可以置为true
switch(io.dcache.access_type) {
is(AccessType.load) {
when(mxr) {
when(!dtlb.flag.r && !dtlb.flag.x) {
dpage_fault := true.B
dmmu_state := search_fault
}.otherwise {
dmodeCheck()
}
}.otherwise {
when(!dtlb.flag.r) {
dpage_fault := true.B
dmmu_state := search_fault
}.otherwise {
dmodeCheck()
}
}
}
is(AccessType.store) {
when(!dtlb.flag.d) {
dpage_fault := true.B
dmmu_state := search_fault
}.otherwise {
when(!dtlb.flag.w) {
dpage_fault := true.B
dmmu_state := search_fault
}.otherwise {
dmodeCheck()
}
}
}
}
}.otherwise {
dmmu_state := search_l2
}
}
}
is(search_l2) {
when(dl2_hit_vec.asUInt.orR) {
dmmu_state := search_l1
dtlb := tlbl2(PriorityEncoder(dl2_hit_vec))
}.otherwise {
req_ptw(1) := true.B
when(!choose_icache && io.dcache.ptw.vpn.ready) {
dmmu_state := search_pte
}
}
}
is(search_pte) {
req_ptw(1) := true.B
when(io.dcache.ptw.pte.valid) {
when(io.dcache.ptw.pte.bits.access_fault) {
daccess_fault := true.B
dmmu_state := search_fault
}.elsewhen(io.dcache.ptw.pte.bits.page_fault) {
dpage_fault := true.B
dmmu_state := search_fault
}.otherwise {
// 在内存中找寻到了页表将其写入TLB
val replace_entry = Wire(tlbBundle)
replace_entry.vpn := dvpn
replace_entry.asid := satp.asid
replace_entry.flag := io.dcache.ptw.pte.bits.entry.flag
replace_entry.ppn := io.dcache.ptw.pte.bits.entry.ppn
replace_entry.rmask := io.dcache.ptw.pte.bits.rmask
tlbl2(replace_index.value) := replace_entry
dtlb := replace_entry
replace_index.inc()
dmmu_state := search_l1
}
}
}
is(search_fault) {
when(io.dcache.complete_single_request) {
dpage_fault := false.B
daccess_fault := false.B
dmmu_state := search_l1
}
}
}
// vpn
val src1 = io.sfence_vma.src_info.src1_data(vpnLen - 1, pageOffsetLen)
// asid
val src2 = io.sfence_vma.src_info.src2_data(asidLen - 1, 0)
when(io.sfence_vma.valid) {
when(!src1.orR && !src2.orR) {
// 将所有tlb的有效位置为0
itlb.flag.v := false.B
dtlb.flag.v := false.B
for (i <- 0 until cpuConfig.tlbEntries) {
tlbl2(i).flag.v := false.B
}
}.elsewhen(!src1.orR && src2.orR) {
// 将asid一致的且g不为1的tlb的有效位置为0
when(itlb.asid === src2 && !itlb.flag.g) {
itlb.flag.v := false.B
}
when(dtlb.asid === src2 && !dtlb.flag.g) {
dtlb.flag.v := false.B
}
for (i <- 0 until cpuConfig.tlbEntries) {
when(tlbl2(i).asid === src2 && !tlbl2(i).flag.g) {
tlbl2(i).flag.v := false.B
}
}
}.elsewhen(src1.orR && !src2.orR) {
// 将vpn一致的tlb的有效位置为0
when(vpnEq(itlb.rmask, src1, itlb.vpn)) {
itlb.flag.v := false.B
}
when(vpnEq(dtlb.rmask, src1, dtlb.vpn)) {
dtlb.flag.v := false.B
}
for (i <- 0 until cpuConfig.tlbEntries) {
when(vpnEq(tlbl2(i).rmask, src1, tlbl2(i).vpn)) {
tlbl2(i).flag.v := false.B
}
}
}.elsewhen(src1.orR && src2.orR) {
// 将asid一致的且vpn一致的tlb的有效位置为0g为1的除外
when(itlb.asid === src2 && vpnEq(itlb.rmask, src1, itlb.vpn) && !itlb.flag.g) {
itlb.flag.v := false.B
}
when(dtlb.asid === src2 && vpnEq(dtlb.rmask, src1, dtlb.vpn) && !dtlb.flag.g) {
dtlb.flag.v := false.B
}
for (i <- 0 until cpuConfig.tlbEntries) {
when(tlbl2(i).asid === src2 && vpnEq(tlbl2(i).rmask, src1, tlbl2(i).vpn) && !tlbl2(i).flag.g) {
tlbl2(i).flag.v := false.B
}
}
}
}
val imasktag = maskTag(itlb.rmask, itlb.ppn, ivpn)
val dmasktag = maskTag(dtlb.rmask, dtlb.ppn, dvpn)
io.icache.uncached := AddressSpace.isMMIO(io.icache.vaddr)
io.icache.ptag := Mux(ivm_enabled, imasktag, ivpn)
io.icache.paddr := Cat(io.icache.ptag, io.icache.vaddr(pageOffsetLen - 1, 0))
io.dcache.uncached := AddressSpace.isMMIO(io.dcache.vaddr)
io.dcache.ptag := Mux(dvm_enabled, dmasktag, dvpn)
io.dcache.paddr := Cat(io.dcache.ptag, io.dcache.vaddr(pageOffsetLen - 1, 0))
}

View File

@ -1,44 +0,0 @@
package cpu.defines
import chisel3._
import chisel3.util._
import cpu.defines.Const._
import cpu.CpuConfig
class SocStatistic extends Bundle {
val csr_count = Output(UInt(32.W))
val csr_random = Output(UInt(32.W))
val csr_cause = Output(UInt(32.W))
val int = Output(Bool())
val commit = Output(Bool())
}
class BranchPredictorUnitStatistic extends Bundle {
val branch = Output(UInt(32.W))
val success = Output(UInt(32.W))
}
class CPUStatistic extends Bundle {
val soc = new SocStatistic()
val bpu = new BranchPredictorUnitStatistic()
}
class GlobalStatistic extends Bundle {
val cpu = new CPUStatistic()
val cache = new CacheStatistic()
}
class ICacheStatistic extends Bundle {
val request = Output(UInt(32.W))
val hit = Output(UInt(32.W))
}
class DCacheStatistic extends Bundle {
val request = Output(UInt(32.W))
val hit = Output(UInt(32.W))
}
class CacheStatistic extends Bundle {
val icache = new ICacheStatistic()
val dcache = new DCacheStatistic()
}

View File

@ -1,102 +0,0 @@
package cpu.defines
import chisel3._
import chisel3.util._
import cpu.defines.Const._
import cpu.CacheConfig
import cpu.CpuConfig
trait HasTlbConst extends CoreParameter {
val PAddrBits = PADDR_WID // 32
val level = 3
val pageOffsetLen = 12 // 页面大小为4KB对应的偏移量长度为12位
val ppn0Len = 9
val ppn1Len = 9
val ppn2Len = PAddrBits - pageOffsetLen - ppn0Len - ppn1Len // 2
val ppnLen = ppn2Len + ppn1Len + ppn0Len // 20
val vpn2Len = 9
val vpn1Len = 9
val vpn0Len = 9
val vpnLen = vpn2Len + vpn1Len + vpn0Len // 27
val maskLen = ppn1Len + ppn0Len // 18
val satpLen = XLEN
val satpModeLen = 4
val asidLen = 16
val flagLen = 8
val ptEntryLen = XLEN
val satpResLen = XLEN - ppnLen - satpModeLen - asidLen
val pteResLen = XLEN - ppnLen - 2 - flagLen
val cacheTagLen = PADDR_WID - pageOffsetLen // 32 - 12 = 20
require(ppnLen == cacheTagLen)
def vpnEq(mask: UInt, vpn: UInt, tlbvpn: UInt) = {
val fullmask = Cat(Fill(vpn2Len, true.B), mask)
(vpn & fullmask) === (tlbvpn & fullmask)
}
def maskTag(mask: UInt, ppn: UInt, vpn: UInt) = {
val fullmask = Cat(Fill(ppn2Len, true.B), mask)
(ppn & fullmask) | (vpn & ~fullmask)
}
def vpnBundle = new Bundle {
val vpn2 = UInt(vpn2Len.W)
val vpn1 = UInt(vpn1Len.W)
val vpn0 = UInt(vpn0Len.W)
}
def ppnBundle = new Bundle {
val ppn2 = UInt(ppn2Len.W)
val ppn1 = UInt(ppn1Len.W)
val ppn0 = UInt(ppn0Len.W)
}
def paddrApply(ppn: UInt, vpnn: UInt): UInt = {
Cat(Cat(ppn, vpnn), 0.U(3.W))
}
def pteBundle = new Bundle {
val reserved = UInt(pteResLen.W)
val ppn = UInt(ppnLen.W)
val rsw = UInt(2.W)
val flag = new Bundle {
val d = Bool()
val a = Bool()
val g = Bool()
val u = Bool()
val x = Bool()
val w = Bool()
val r = Bool()
val v = Bool()
}
}
def satpBundle = new Bundle {
val mode = UInt(satpModeLen.W)
val asid = UInt(asidLen.W)
val res = UInt(satpResLen.W)
val ppn = UInt(ppnLen.W)
}
def flagBundle = new Bundle {
val d = Bool()
val a = Bool()
val g = Bool()
val u = Bool()
val x = Bool()
val w = Bool()
val r = Bool()
val v = Bool()
}
def tlbBundle = new Bundle {
val vpn = UInt(vpnLen.W)
val asid = UInt(asidLen.W)
val flag = flagBundle
val ppn = UInt(ppnLen.W)
val rmask = UInt(maskLen.W)
}
}

View File

@ -1,84 +0,0 @@
package cpu.pipeline.decode
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
import cpu.defines.Instructions._
import cpu.CpuConfig
class Issue(implicit val cpuConfig: CpuConfig) extends Module with HasCSRConst {
val io = IO(new Bundle {
// 输入
val allow_to_go = Input(Bool())
val instFifo = Input(new Bundle {
val empty = Bool()
val almost_empty = Bool()
})
val decodeInst = Input(Vec(cpuConfig.decoderNum, new Info()))
val execute = Input(Vec(cpuConfig.commitNum, new MemRead()))
// 输出
val inst1 = Output(new Bundle {
val allow_to_go = Bool()
})
})
if (cpuConfig.decoderNum == 2) {
val inst = io.decodeInst
// inst buffer是否存有至少2条指令
val instFifo_invalid = io.instFifo.empty || io.instFifo.almost_empty
// 结构冲突
val lsu_conflict = inst.map(_.fusel === FuType.lsu).reduce(_ && _) // 访存单元最大支持1条指令的load和store
val mdu_conflict = inst.map(_.fusel === FuType.mdu).reduce(_ && _) // 乘除单元最大支持1条指令的乘除法
val csr_conflict = inst.map(_.fusel === FuType.csr).reduce(_ && _) // csr单元最大支持1条指令的读写
val struct_conflict = lsu_conflict || mdu_conflict || csr_conflict
// 写后读冲突
val load_stall = // inst1的源操作数需要经过load得到但load指令还在exe级未访存
io.execute(0).is_load && io.execute(0).reg_waddr.orR &&
(inst(1).src1_ren && inst(1).src1_raddr === io.execute(0).reg_waddr ||
inst(1).src2_ren && inst(1).src2_raddr === io.execute(0).reg_waddr) ||
io.execute(1).is_load && io.execute(1).reg_waddr.orR &&
(inst(1).src1_ren && inst(1).src1_raddr === io.execute(1).reg_waddr ||
inst(1).src2_ren && inst(1).src2_raddr === io.execute(1).reg_waddr)
val raw_reg = // inst1的源操作数是inst0的目的操作数
inst(0).reg_wen && inst(0).reg_waddr.orR &&
(inst(0).reg_waddr === inst(1).src1_raddr && inst(1).src1_ren ||
inst(0).reg_waddr === inst(1).src2_raddr && inst(1).src2_ren)
val data_conflict = raw_reg || load_stall
// bru指令只能在inst0执行
val is_bru = inst.map(_.fusel === FuType.bru).reduce(_ || _)
// mou指令会导致流水线清空
val is_mou = inst.map(_.fusel === FuType.mou).reduce(_ || _)
// 写satp指令会导致流水线清空
val write_satp = VecInit(
Seq.tabulate(cpuConfig.commitNum)(i =>
inst(i).fusel === FuType.csr && CSROpType.isCSROp(inst(i).op) && inst(i).inst(31, 20) === Satp.U
)
).asUInt.orR
// uretsretmret指令会导致流水线清空
val ret = HasRet(inst(0)) || HasRet(inst(1))
// 这些csr相关指令会导致流水线清空
val is_some_csr_inst = write_satp || ret
// 下面的情况只进行单发射
val single_issue = is_mou || is_bru || is_some_csr_inst
// 指令1是否允许执行
io.inst1.allow_to_go :=
io.allow_to_go && // 指令0允许执行
!instFifo_invalid && // inst buffer存有至少2条指令
!struct_conflict && // 无结构冲突
!data_conflict && // 无写后读冲突
!single_issue // 非单发射指令
} else {
io.inst1.allow_to_go := false.B
}
}

View File

@ -1,48 +0,0 @@
package cpu.pipeline.decode
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
import cpu.CpuConfig
class JumpCtrl(implicit val cpuConfig: CpuConfig) extends Module {
val io = IO(new Bundle {
val in = Input(new Bundle {
val pc = UInt(XLEN.W)
val info = new Info()
val src_info = new SrcInfo()
val forward = Vec(cpuConfig.commitNum, new DataForwardToDecodeUnit())
})
val out = Output(new Bundle {
val jump_register = Bool()
val jump = Bool()
val jump_target = UInt(XLEN.W)
})
})
val valid = io.in.info.valid
val op = io.in.info.op
val fusel = io.in.info.fusel
val jump_inst = VecInit(BRUOpType.jal).contains(op) && fusel === FuType.bru
val jump_register_inst = VecInit(BRUOpType.jalr).contains(op) && fusel === FuType.bru
io.out.jump := (jump_inst || jump_register_inst && !io.out.jump_register) && valid
if (cpuConfig.decoderNum == 2) {
io.out.jump_register := jump_register_inst && io.in.info.src1_raddr.orR &&
((io.in.forward(0).exe.wen && io.in.info.src1_raddr === io.in.forward(0).exe.waddr) ||
(io.in.forward(1).exe.wen && io.in.info.src1_raddr === io.in.forward(1).exe.waddr) ||
(io.in.forward(0).mem.wen && io.in.info.src1_raddr === io.in.forward(0).mem.waddr) ||
(io.in.forward(1).mem.wen && io.in.info.src1_raddr === io.in.forward(1).mem.waddr))
} else {
io.out.jump_register := jump_register_inst && io.in.info.src1_raddr.orR &&
((io.in.forward(0).exe.wen && io.in.info.src1_raddr === io.in.forward(0).exe.waddr) ||
(io.in.forward(0).mem.wen && io.in.info.src1_raddr === io.in.forward(0).mem.waddr))
}
io.out.jump_target := Mux(
jump_inst,
io.in.src_info.src1_data + io.in.src_info.src2_data,
(io.in.src_info.src1_data + io.in.src_info.src2_data) & ~1.U(XLEN.W)
)
}

View File

@ -1,49 +0,0 @@
package cpu.pipeline.execute
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
class BranchCtrl extends Module {
val io = IO(new Bundle {
val in = new Bundle {
val pc = Input(UInt(XLEN.W))
val info = Input(new Info())
val src_info = Input(new SrcInfo())
val pred_branch = Input(Bool())
val jump_regiser = Input(Bool())
val branch_target = Input(UInt(XLEN.W))
}
val out = new Bundle {
val branch = Output(Bool())
val pred_fail = Output(Bool())
val target = Output(UInt(XLEN.W))
}
})
val valid =
io.in.info.fusel === FuType.bru && BRUOpType.isBranch(io.in.info.op) && io.in.info.valid
val src1 = io.in.src_info.src1_data
val src2 = io.in.src_info.src2_data
val op = io.in.info.op
val is_sub = !BRUOpType.isAdd(op)
val adder = (src1 +& (src2 ^ Fill(XLEN, is_sub))) + is_sub
val xor = src1 ^ src2
val sltu = !adder(XLEN)
val slt = xor(XLEN - 1) ^ sltu
val table = List(
BRUOpType.getBranchType(BRUOpType.beq) -> !xor.orR,
BRUOpType.getBranchType(BRUOpType.blt) -> slt,
BRUOpType.getBranchType(BRUOpType.bltu) -> sltu
)
io.out.pred_fail := io.in.pred_branch =/= io.out.branch
io.out.branch :=
(LookupTree(BRUOpType.getBranchType(op), table) ^ BRUOpType.isBranchInvert(op)) & valid
io.out.target := Mux1H(
Seq(
(io.out.pred_fail && io.out.branch) -> io.in.branch_target,
(io.out.pred_fail && !io.out.branch) -> (io.in.pc + 4.U),
(io.in.jump_regiser) -> ((src1 + src2) & ~1.U(XLEN.W))
)
)
}

View File

@ -1,150 +0,0 @@
package cpu.pipeline.fetch
import chisel3._
import chisel3.util._
import cpu.defines.Const._
import cpu._
import cpu.pipeline.decode.Src12Read
import cpu.defines.BRUOpType
import cpu.defines.FuOpType
import cpu.defines.FuType
import cpu.defines.SignedExtend
import cpu.pipeline.decode.DecoderBranchPredictorUnit
import pipeline.decode.{DecoderBranchPredictorUnit, Src12Read}
class ExecuteUnitBranchPredictor extends Bundle {
val bpuConfig = new BranchPredictorConfig()
val pc = Output(UInt(XLEN.W))
val update_pht_index = Output(UInt(bpuConfig.phtDepth.W))
val branch_inst = Output(Bool())
val branch = Output(Bool())
}
class BranchPredictorIO(implicit cpuConfig: CpuConfig) extends Bundle {
val bpuConfig = new BranchPredictorConfig()
val decode = Flipped(new DecoderBranchPredictorUnit())
val instBuffer = new Bundle {
val pc = Input(Vec(cpuConfig.instFetchNum, UInt(XLEN.W)))
val pht_index = Output(Vec(cpuConfig.instFetchNum, UInt(bpuConfig.phtDepth.W)))
}
val execute = Flipped(new ExecuteUnitBranchPredictor())
}
class BranchPredictorUnit(implicit cpuConfig: CpuConfig) extends Module {
val io = IO(new BranchPredictorIO())
if (cpuConfig.branchPredictor == "adaptive") {
val adaptive_predictor = Module(new AdaptiveTwoLevelPredictor())
io <> adaptive_predictor.io
}
if (cpuConfig.branchPredictor == "global") {
val global_predictor = Module(new GlobalBranchPredictor())
io <> global_predictor.io
}
}
class GlobalBranchPredictor(
GHR_DEPTH: Int = 4, // 可以记录的历史记录个数
PC_HASH_WID: Int = 4, // 取得PC的宽度
PHT_DEPTH: Int = 6, // 可以记录的历史个数
BHT_DEPTH: Int = 4 // 取得PC的宽度
)(
implicit
cpuConfig: CpuConfig)
extends Module {
val io = IO(new BranchPredictorIO())
val strongly_not_taken :: weakly_not_taken :: weakly_taken :: strongly_taken :: Nil = Enum(4)
val imm = io.decode.info.imm
io.decode.branch_inst := io.decode.info.valid &&
FuType.bru === io.decode.info.fusel && BRUOpType.isBranch(io.decode.info.op)
io.decode.target := io.decode.pc + imm
// 局部预测模式
val bht = RegInit(VecInit(Seq.fill(1 << BHT_DEPTH)(0.U(PHT_DEPTH.W))))
val pht = RegInit(VecInit(Seq.fill(1 << PHT_DEPTH)(strongly_taken)))
val bht_index = io.decode.pc(1 + BHT_DEPTH, 2)
val pht_index = bht(bht_index)
io.decode.branch :=
io.decode.branch_inst && (pht(pht_index) === weakly_taken || pht(pht_index) === strongly_taken)
val update_bht_index = io.execute.pc(1 + BHT_DEPTH, 2)
val update_pht_index = bht(update_bht_index)
when(io.execute.branch_inst) {
bht(update_bht_index) := Cat(bht(update_bht_index)(PHT_DEPTH - 2, 0), io.execute.branch)
switch(pht(update_pht_index)) {
is(strongly_not_taken) {
pht(update_pht_index) := Mux(io.execute.branch, weakly_not_taken, strongly_not_taken)
}
is(weakly_not_taken) {
pht(update_pht_index) := Mux(io.execute.branch, weakly_taken, strongly_not_taken)
}
is(weakly_taken) {
pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_not_taken)
}
is(strongly_taken) {
pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_taken)
}
}
}
}
class AdaptiveTwoLevelPredictor(
)(
implicit
cpuConfig: CpuConfig)
extends Module {
val bpuConfig = new BranchPredictorConfig()
val PHT_DEPTH = bpuConfig.phtDepth
val BHT_DEPTH = bpuConfig.bhtDepth
val io = IO(new BranchPredictorIO())
val strongly_not_taken :: weakly_not_taken :: weakly_taken :: strongly_taken :: Nil = Enum(4)
val imm = io.decode.info.imm
io.decode.branch_inst := io.decode.info.valid &&
FuType.bru === io.decode.info.fusel && BRUOpType.isBranch(io.decode.info.op)
io.decode.target := io.decode.pc + imm
val bht = RegInit(VecInit(Seq.fill(1 << BHT_DEPTH)(0.U(PHT_DEPTH.W))))
val pht = RegInit(VecInit(Seq.fill(1 << PHT_DEPTH)(strongly_taken)))
val pht_index = io.decode.pht_index
for (i <- 0 until cpuConfig.instFetchNum) {
io.instBuffer.pht_index(i) := bht(io.instBuffer.pc(i)(1 + BHT_DEPTH, 2))
}
io.decode.branch :=
io.decode.branch_inst && (pht(pht_index) === weakly_taken || pht(pht_index) === strongly_taken)
io.decode.update_pht_index := bht(io.decode.pc(1 + BHT_DEPTH, 2))
val update_bht_index = io.execute.pc(1 + BHT_DEPTH, 2)
val update_pht_index = io.execute.update_pht_index
when(io.execute.branch_inst) {
bht(update_bht_index) := Cat(bht(update_bht_index)(PHT_DEPTH - 2, 0), io.execute.branch)
switch(pht(update_pht_index)) {
is(strongly_not_taken) {
pht(update_pht_index) := Mux(io.execute.branch, weakly_not_taken, strongly_not_taken)
}
is(weakly_not_taken) {
pht(update_pht_index) := Mux(io.execute.branch, weakly_taken, strongly_not_taken)
}
is(weakly_taken) {
pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_not_taken)
}
is(strongly_taken) {
pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_taken)
}
}
}
}

View File

@ -1,102 +0,0 @@
package cpu.pipeline.fetch
import chisel3._
import chisel3.util._
import cpu.defines.Const._
import cpu.{BranchPredictorConfig, CpuConfig}
import cpu.pipeline.decode.DecodeUnitInstFifo
class IfIdData extends Bundle {
val bpuConfig = new BranchPredictorConfig()
val inst = UInt(XLEN.W)
val pht_index = UInt(bpuConfig.phtDepth.W)
val addr_misaligned = Bool()
val access_fault = Bool()
val page_fault = Bool()
val pc = UInt(XLEN.W)
}
class InstFifo(implicit val cpuConfig: CpuConfig) extends Module {
val io = IO(new Bundle {
val do_flush = Input(Bool())
val wen = Input(Vec(cpuConfig.instFetchNum, Bool()))
val write = Input(Vec(cpuConfig.instFetchNum, new IfIdData()))
val full = Output(Bool())
val decoderUint = Flipped(new DecodeUnitInstFifo())
})
// fifo buffer
val buffer = RegInit(VecInit(Seq.fill(cpuConfig.instFifoDepth)(0.U.asTypeOf(new IfIdData()))))
// fifo ptr
val enq_ptr = RegInit(0.U(log2Ceil(cpuConfig.instFifoDepth).W))
val deq_ptr = RegInit(0.U(log2Ceil(cpuConfig.instFifoDepth).W))
val count = RegInit(0.U(log2Ceil(cpuConfig.instFifoDepth).W))
// config.instFifoDepth - 1 is the last element, config.instFifoDepth - 2 is the last second element
// the second last element's valid decide whether the fifo is full
val full = count >= (cpuConfig.instFifoDepth - cpuConfig.instFetchNum).U
val empty = count === 0.U
val almost_empty = count === 1.U
io.full := full
io.decoderUint.info.empty := empty
io.decoderUint.info.almost_empty := almost_empty
// * deq * //
io.decoderUint.inst(0) := MuxCase(
buffer(deq_ptr),
Seq(
empty -> 0.U.asTypeOf(new IfIdData()),
almost_empty -> buffer(deq_ptr)
)
)
io.decoderUint.inst(1) := MuxCase(
buffer(deq_ptr + 1.U),
Seq(
(empty || almost_empty) -> 0.U.asTypeOf(new IfIdData())
)
)
val deq_num = MuxCase(
0.U,
Seq(
(empty) -> 0.U,
io.decoderUint.allow_to_go(1) -> 2.U,
io.decoderUint.allow_to_go(0) -> 1.U
)
)
when(io.do_flush) {
deq_ptr := 0.U
}.otherwise {
deq_ptr := deq_ptr + deq_num
}
// * enq * //
val enq_num = Wire(UInt(log2Ceil(cpuConfig.instFetchNum + 1).W))
for (i <- 0 until cpuConfig.instFetchNum) {
when(io.wen(i)) {
buffer(enq_ptr + i.U) := io.write(i)
}
}
when(io.do_flush) {
enq_ptr := 0.U
}.otherwise {
enq_ptr := enq_ptr + enq_num
}
enq_num := 0.U
for (i <- 0 until cpuConfig.instFetchNum) {
when(io.wen(i)) {
enq_num := (i + 1).U
}
}
count := Mux(io.do_flush, 0.U, count + enq_num + cpuConfig.instFifoDepth.U - deq_num)
}

View File

@ -1,210 +0,0 @@
package cpu.pipeline.memory
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
import cpu.CpuConfig
import chisel3.util.experimental.BoringUtils
class Lsu_DataMemory extends Bundle {
val in = Input(new Bundle {
val access_fault = Bool()
val page_fault = Bool()
val ready = Bool()
val rdata = UInt(XLEN.W)
})
val out = Output(new Bundle {
val en = Bool()
val rlen = UInt(AXI_LEN_WID.W)
val wen = Bool()
val wstrb = UInt(AXI_STRB_WID.W)
val addr = UInt(XLEN.W)
val wdata = UInt(XLEN.W)
})
}
class Lsu_MemoryUnit extends Bundle {
val in = Input(new Bundle {
val mem_en = Bool()
val info = new Info()
val src_info = new SrcInfo()
val ex = new ExceptionInfo()
val lr = Bool()
val lr_addr = UInt(XLEN.W)
val allow_to_go = Bool()
})
val out = Output(new Bundle {
val ready = Bool()
val rdata = UInt(XLEN.W)
val ex = new ExceptionInfo()
// 用于指示dcache完成一次请求
val complete_single_request = Bool()
val lr_wen = Bool()
val lr_wbit = Bool()
val lr_waddr = UInt(XLEN.W)
})
}
class Lsu(implicit val cpuConfig: CpuConfig) extends Module {
val io = IO(new Bundle {
val memoryUnit = new Lsu_MemoryUnit()
val dataMemory = new Lsu_DataMemory()
})
val atomAlu = Module(new AtomAlu()).io
val lsExecute = Module(new LsExecute()).io
val valid = io.memoryUnit.in.mem_en
val src1 = io.memoryUnit.in.src_info.src1_data
val src2 = io.memoryUnit.in.src_info.src2_data
val imm = io.memoryUnit.in.info.imm
val func = io.memoryUnit.in.info.op
val inst = io.memoryUnit.in.info.inst
val store_req = valid & LSUOpType.isStore(func)
val load_req = valid & LSUOpType.isLoad(func)
val atom_req = valid & LSUOpType.isAtom(func)
val amo_req = valid & LSUOpType.isAMO(func)
val lr_req = valid & LSUOpType.isLR(func)
val sc_req = valid & LSUOpType.isSC(func)
val funct3 = inst(14, 12)
val atom_d = funct3(0)
// Atom LR/SC Control Bits
val lr = WireInit(Bool(), false.B)
val lr_addr = WireInit(UInt(XLEN.W), DontCare)
io.memoryUnit.out.lr_wen := io.memoryUnit.out.ready && (lr_req || sc_req)
io.memoryUnit.out.lr_wbit := lr_req
io.memoryUnit.out.lr_waddr := src1
lr := io.memoryUnit.in.lr
lr_addr := io.memoryUnit.in.lr_addr
val s_idle :: s_sc :: s_amo_a :: s_amo_s :: Nil = Enum(4)
val state = RegInit(s_idle)
val atom_wdata = Reg(UInt(XLEN.W))
val atom_rdata = Reg(UInt(XLEN.W))
atomAlu.in.rdata := atom_wdata
atomAlu.in.src2 := src2
atomAlu.in.info := io.memoryUnit.in.info
val sc_invalid = (src1 =/= lr_addr || !lr) && sc_req
lsExecute.in.info := DontCare
lsExecute.in.mem_addr := DontCare
lsExecute.in.mem_en := false.B
lsExecute.in.wdata := DontCare
io.memoryUnit.out.ready := false.B
val allow_to_go = io.memoryUnit.in.allow_to_go
val complete_single_request = Wire(Bool())
// 只有amo操作时该信号才发挥作用
complete_single_request := false.B
io.memoryUnit.out.complete_single_request := complete_single_request
switch(state) {
is(s_idle) { // 0
lsExecute.in.mem_en := io.memoryUnit.in.mem_en && !atom_req
lsExecute.in.mem_addr := src1 + imm
lsExecute.in.info.op := func
lsExecute.in.wdata := src2
io.memoryUnit.out.ready := lsExecute.out.ready || sc_invalid
when(amo_req) {
lsExecute.in.mem_en := true.B
lsExecute.in.mem_addr := src1
lsExecute.in.info.op := Mux(atom_d, LSUOpType.ld, LSUOpType.lw)
lsExecute.in.wdata := DontCare
io.memoryUnit.out.ready := false.B
when(lsExecute.out.ready) {
state := s_amo_a;
// 告诉dcache已经完成一次访存操作可以进入下一次访存
complete_single_request := true.B
}
atom_wdata := lsExecute.out.rdata
atom_rdata := lsExecute.out.rdata
}
when(lr_req) {
lsExecute.in.mem_en := true.B
lsExecute.in.mem_addr := src1
lsExecute.in.info.op := Mux(atom_d, LSUOpType.ld, LSUOpType.lw)
lsExecute.in.wdata := DontCare
io.memoryUnit.out.ready := lsExecute.out.ready
}
when(sc_req) { state := Mux(sc_invalid, s_idle, s_sc) }
}
is(s_sc) { // 1
lsExecute.in.mem_en := true.B
lsExecute.in.mem_addr := src1
lsExecute.in.info.op := Mux(atom_d, LSUOpType.sd, LSUOpType.sw)
lsExecute.in.wdata := src2
io.memoryUnit.out.ready := lsExecute.out.ready
when(allow_to_go) {
state := s_idle
}
}
is(s_amo_a) { // 2
lsExecute.in.mem_en := false.B
lsExecute.in.mem_addr := DontCare
lsExecute.in.info.op := DontCare
lsExecute.in.wdata := DontCare
io.memoryUnit.out.ready := false.B
state := s_amo_s
atom_wdata := atomAlu.out.result
}
is(s_amo_s) { // 3
lsExecute.in.mem_en := true.B
lsExecute.in.mem_addr := src1
lsExecute.in.info.op := Mux(atom_d, LSUOpType.sd, LSUOpType.sw)
lsExecute.in.wdata := atom_wdata
io.memoryUnit.out.ready := lsExecute.out.ready
when(allow_to_go) {
state := s_idle
}
}
}
when(
lsExecute.out.addr_misaligned ||
lsExecute.out.access_fault ||
lsExecute.out.page_fault
) {
state := s_idle
io.memoryUnit.out.ready := true.B
complete_single_request := false.B // 发生例外时应该由ctrl的allow to go控制
}
io.dataMemory <> lsExecute.dataMemory
io.memoryUnit.out.ex := io.memoryUnit.in.ex
io.memoryUnit.out.ex.exception(loadAddrMisaligned) := (load_req || lr_req) && lsExecute.out.addr_misaligned
io.memoryUnit.out.ex.exception(loadAccessFault) := (load_req || lr_req) && lsExecute.out.access_fault
io.memoryUnit.out.ex.exception(loadPageFault) := (load_req || lr_req) && lsExecute.out.page_fault
io.memoryUnit.out.ex
.exception(storeAddrMisaligned) := (store_req || sc_req || amo_req) && lsExecute.out.addr_misaligned
io.memoryUnit.out.ex.exception(storeAccessFault) := (store_req || sc_req || amo_req) && lsExecute.out.addr_misaligned
io.memoryUnit.out.ex.exception(storePageFault) := (store_req || sc_req || amo_req) && lsExecute.out.page_fault
io.memoryUnit.out.ex.tval(loadAddrMisaligned) := io.dataMemory.out.addr
io.memoryUnit.out.ex.tval(loadAccessFault) := io.dataMemory.out.addr
io.memoryUnit.out.ex.tval(loadPageFault) := io.dataMemory.out.addr
io.memoryUnit.out.ex.tval(storeAddrMisaligned) := io.dataMemory.out.addr
io.memoryUnit.out.ex.tval(storeAccessFault) := io.dataMemory.out.addr
io.memoryUnit.out.ex.tval(storePageFault) := io.dataMemory.out.addr
io.memoryUnit.out.rdata := MuxCase(
lsExecute.out.rdata,
Seq(
(sc_req) -> sc_invalid,
(amo_req) -> atom_rdata
)
)
}

View File

@ -1,31 +0,0 @@
package cpu.pipeline.memory
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
class Mou extends Module {
val io = IO(new Bundle {
val in = Input(new Bundle {
val info = new Info()
val pc = UInt(XLEN.W)
})
val out = Output(new Bundle {
val flush = Bool()
val fence_i = Bool()
val sfence_vma = Bool()
val target = UInt(XLEN.W)
})
})
val valid = io.in.info.valid && io.in.info.fusel === FuType.mou
val fence_i = valid && io.in.info.op === MOUOpType.fencei
val sfence_vma = valid && io.in.info.op === MOUOpType.sfence_vma
io.out.flush := valid
io.out.fence_i := fence_i
io.out.sfence_vma := sfence_vma
io.out.target := io.in.pc + 4.U
}

View File

@ -1,48 +0,0 @@
package cpu.pipeline.memory
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
import cpu.CpuConfig
class AtomAlu extends Module {
val io = IO(new Bundle {
val in = Input(new Bundle {
val rdata = Input(UInt(XLEN.W)) // load data
val src2 = Input(UInt(XLEN.W)) // reg data
val info = new Info()
})
val out = Output(new Bundle {
val result = Output(UInt(XLEN.W))
})
})
val src1 = io.in.rdata
val src2 = io.in.src2
val op = io.in.info.op
val is_sub = !LSUOpType.isAdd(op)
val sum = (src1 +& (src2 ^ Fill(XLEN, is_sub))) + is_sub
val oxr = src1 ^ src2
val sltu = !sum(XLEN)
val slt = oxr(XLEN - 1) ^ sltu
val is_word = !io.in.info.inst(12)
val res = LookupTreeDefault(
op(5, 0),
sum,
List(
LSUOpType.amoswap -> src2,
LSUOpType.amoadd -> sum,
LSUOpType.amoxor -> oxr,
LSUOpType.amoand -> (src1 & src2),
LSUOpType.amoor -> (src1 | src2),
LSUOpType.amomin -> Mux(slt(0), src1, src2),
LSUOpType.amomax -> Mux(slt(0), src2, src1),
LSUOpType.amominu -> Mux(sltu(0), src1, src2),
LSUOpType.amomaxu -> Mux(sltu(0), src2, src1)
)
)
io.out.result := Mux(is_word, SignedExtend(res(31, 0), 64), res(XLEN - 1, 0))
}

View File

@ -1,143 +0,0 @@
package cpu.pipeline.memory
import chisel3._
import chisel3.util._
import cpu.defines._
import cpu.defines.Const._
import cpu.CpuConfig
class LsExecute extends Module {
val io = IO(new Bundle {
val dataMemory = new Lsu_DataMemory()
val in = Input(new Bundle {
val mem_en = Bool()
val mem_addr = UInt(XLEN.W)
val wdata = UInt(XLEN.W)
val info = new Info()
})
val out = Output(new Bundle {
val addr_misaligned = Bool()
val access_fault = Bool()
val page_fault = Bool()
val rdata = UInt(XLEN.W)
val ready = Bool()
})
})
def genWmask(addr: UInt, sizeEncode: UInt): UInt = {
LookupTree(
sizeEncode,
List(
"b00".U -> 0x1.U, //0001 << addr(2:0)
"b01".U -> 0x3.U, //0011
"b10".U -> 0xf.U, //1111
"b11".U -> 0xff.U //11111111
)
) << addr(2, 0)
}
def genWdata(data: UInt, sizeEncode: UInt): UInt = {
LookupTree(
sizeEncode,
List(
"b00".U -> Fill(8, data(7, 0)),
"b01".U -> Fill(4, data(15, 0)),
"b10".U -> Fill(2, data(31, 0)),
"b11".U -> data
)
)
}
def genWmask32(addr: UInt, sizeEncode: UInt): UInt = {
LookupTree(
sizeEncode,
List(
"b00".U -> 0x1.U, //0001 << addr(1:0)
"b01".U -> 0x3.U, //0011
"b10".U -> 0xf.U //1111
)
) << addr(1, 0)
}
def genWdata32(data: UInt, sizeEncode: UInt): UInt = {
LookupTree(
sizeEncode,
List(
"b00".U -> Fill(4, data(7, 0)),
"b01".U -> Fill(2, data(15, 0)),
"b10".U -> data
)
)
}
val valid = io.in.mem_en
val addr = io.in.mem_addr
val op = io.in.info.op
val is_store = valid && LSUOpType.isStore(op)
val partial_load = !is_store && (op =/= LSUOpType.ld)
val size = op(1, 0)
val req_addr = if (XLEN == 32) SignedExtend(addr, XLEN) else addr
val req_wdata = if (XLEN == 32) genWdata32(io.in.wdata, size) else genWdata(io.in.wdata, size)
val req_wmask = if (XLEN == 32) genWmask32(addr, size) else genWmask(addr, size)
val rdata = io.dataMemory.in.rdata
val access_fault = io.dataMemory.in.access_fault
val page_fault = io.dataMemory.in.page_fault
val rdata64 = LookupTree(
addr(2, 0),
List(
"b000".U -> rdata(63, 0),
"b001".U -> rdata(63, 8),
"b010".U -> rdata(63, 16),
"b011".U -> rdata(63, 24),
"b100".U -> rdata(63, 32),
"b101".U -> rdata(63, 40),
"b110".U -> rdata(63, 48),
"b111".U -> rdata(63, 56)
)
)
val rdata32 = LookupTree(
addr(1, 0),
List(
"b00".U -> rdata(31, 0),
"b01".U -> rdata(31, 8),
"b10".U -> rdata(31, 16),
"b11".U -> rdata(31, 24)
)
)
val rdata_result = if (XLEN == 32) rdata32 else rdata64
val rdata_partial_result = LookupTree(
op,
List(
LSUOpType.lb -> SignedExtend(rdata_result(7, 0), XLEN),
LSUOpType.lh -> SignedExtend(rdata_result(15, 0), XLEN),
LSUOpType.lw -> SignedExtend(rdata_result(31, 0), XLEN),
LSUOpType.lbu -> ZeroExtend(rdata_result(7, 0), XLEN),
LSUOpType.lhu -> ZeroExtend(rdata_result(15, 0), XLEN),
LSUOpType.lwu -> ZeroExtend(rdata_result(31, 0), XLEN)
)
)
val addr_aligned = LookupTree(
op(1, 0),
List(
"b00".U -> true.B, //b
"b01".U -> (addr(0) === 0.U), //h
"b10".U -> (addr(1, 0) === 0.U), //w
"b11".U -> (addr(2, 0) === 0.U) //d
)
)
io.dataMemory.out.en := valid && !io.out.addr_misaligned
io.dataMemory.out.rlen := size
io.dataMemory.out.wen := is_store
io.dataMemory.out.wstrb := req_wmask
io.dataMemory.out.addr := req_addr
io.dataMemory.out.wdata := req_wdata
io.out.ready := io.dataMemory.in.ready && io.dataMemory.out.en
io.out.rdata := Mux(partial_load, rdata_partial_result, rdata_result)
io.out.addr_misaligned := valid && !addr_aligned
io.out.access_fault := valid && access_fault
io.out.page_fault := valid && page_fault
}

View File

@ -1,80 +0,0 @@
package cpu.pipeline.writeback
import chisel3._
import chisel3.util._
import cpu.defines.DEBUG
class CommitBuffer(
depth: Int = 128)
extends Module {
val io = IO(new Bundle {
val flush = Input(Bool())
val enq = Flipped(Vec(2, new DEBUG()))
val deq = new DEBUG()
})
val ram = RegInit(VecInit(Seq.fill(depth)(0.U.asTypeOf(new DEBUG()))))
val enq_ptr = RegInit(0.U(log2Ceil(depth).W))
val deq_ptr = RegInit(0.U(log2Ceil(depth).W))
val maybe_full = RegInit(false.B)
val ptr_match = enq_ptr === deq_ptr
val empty = ptr_match && !maybe_full
val full = ptr_match && maybe_full
val do_enq = Wire(Vec(2, Bool()))
val do_deq = WireDefault(io.deq.wb_rf_wen.orR)
for { i <- 0 until 2 } {
do_enq(i) := io.enq(i).wb_rf_wen.orR
}
val next_enq_ptr = MuxCase(
enq_ptr,
Seq(
io.flush -> 0.U,
(do_enq(0) && do_enq(1)) -> (enq_ptr + 2.U),
(do_enq(0) || do_enq(1)) -> (enq_ptr + 1.U)
)
)
when(do_enq(0)) {
ram(enq_ptr) := io.enq(0)
}
val enq1_ptr = Mux(do_enq(0), enq_ptr + 1.U, enq_ptr)
when(do_enq(1)) {
ram(enq1_ptr) := io.enq(1)
}
val next_deq_ptr =
Mux(do_deq, deq_ptr + 1.U, deq_ptr)
when(do_enq(0) =/= do_deq) {
maybe_full := do_enq(0)
}
when(do_enq(1)) {
maybe_full := do_enq(1)
}
when(io.flush) {
enq_ptr := 0.U
deq_ptr := 0.U
maybe_full := false.B
}.otherwise {
enq_ptr := next_enq_ptr
deq_ptr := next_deq_ptr
}
when(do_deq) {
ram(deq_ptr).wb_rf_wen := 0.U
}
when(empty) {
do_deq := false.B
io.deq := DontCare
io.deq.wb_rf_wen := 0.U
}.otherwise {
io.deq := ram(deq_ptr)
}
}