删除不必要的文件
This commit is contained in:
parent
7e13a02cb4
commit
a69e4e907d
|
@ -1,119 +0,0 @@
|
|||
package cpu.axi
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
|
||||
/** A simple FIFO buffer implemented using Chisel's built-in Queue module.
|
||||
*
|
||||
* @param dataWidth
|
||||
* The width of the data to be stored in the buffer.
|
||||
* @param buffDepth
|
||||
* The depth of the buffer (i.e. the number of elements it can hold).
|
||||
* @param addrWidth
|
||||
* The width of the address used to access the buffer.
|
||||
*/
|
||||
class FifoBuffer(
|
||||
val dataWidth: Int = 32,
|
||||
val buffDepth: Int = 4,
|
||||
val addrWidth: Int = 2,
|
||||
) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val wen = Input(Bool()) // Write enable signal.
|
||||
val ren = Input(Bool()) // Read enable signal.
|
||||
val input = Input(UInt(dataWidth.W)) // Data to be written to the buffer.
|
||||
val output = Output(UInt(dataWidth.W)) // Data read from the buffer.
|
||||
val empty = Output(Bool()) // Output signal indicating whether the buffer is empty.
|
||||
val full = Output(Bool()) // Output signal indicating whether the buffer is full.
|
||||
})
|
||||
|
||||
// Instantiate a Queue module with the given data width and buffer depth.
|
||||
val queue = Module(new Queue(UInt(dataWidth.W), buffDepth))
|
||||
|
||||
// Connect the input and output signals to the Queue module.
|
||||
queue.io.enq.valid := io.wen
|
||||
queue.io.enq.bits := io.input
|
||||
io.full := queue.io.enq.ready === false.B
|
||||
queue.io.deq.ready := io.ren
|
||||
io.output := queue.io.deq.bits
|
||||
io.empty := queue.io.count === 0.U
|
||||
}
|
||||
|
||||
/** A simple counter that keeps track of the number of elements in a FIFO buffer.
|
||||
*
|
||||
* @param buffDepth
|
||||
* The depth of the buffer (i.e. the number of elements it can hold).
|
||||
* @param addrWidth
|
||||
* The width of the address used to access the buffer.
|
||||
*/
|
||||
class FifoCount(
|
||||
val buffDepth: Int = 4,
|
||||
val addrWidth: Int = 2,
|
||||
) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val wen = Input(Bool())
|
||||
val ren = Input(Bool())
|
||||
val empty = Output(Bool())
|
||||
val full = Output(Bool())
|
||||
})
|
||||
|
||||
val count = RegInit(0.U(addrWidth.W))
|
||||
|
||||
io.empty := count === 0.U
|
||||
io.full := count === buffDepth.U
|
||||
|
||||
when(io.ren && !io.empty) {
|
||||
count := count - 1.U
|
||||
}.elsewhen(io.wen && !io.full) {
|
||||
count := count + 1.U
|
||||
}
|
||||
}
|
||||
|
||||
/** A FIFO buffer with a valid signal that checks if the output data is related to a specific value.
|
||||
*
|
||||
* @param dataWidth
|
||||
* The width of the data to be stored in the buffer.
|
||||
* @param buffDepth
|
||||
* The depth of the buffer (i.e. the number of elements it can hold).
|
||||
* @param addrWidth
|
||||
* The width of the address used to access the buffer.
|
||||
* @param relatedDataWidth
|
||||
* The width of the related data used to check if the output data is related to a specific value.
|
||||
*/
|
||||
class FifoBufferValid(
|
||||
val dataWidth: Int = 33,
|
||||
val buffDepth: Int = 6,
|
||||
val addrWidth: Int = 3,
|
||||
val relatedDataWidth: Int = 32,
|
||||
) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val wen = Input(Bool()) // Write enable signal.
|
||||
val ren = Input(Bool()) // Read enable signal.
|
||||
val empty = Output(Bool()) // Output signal indicating whether the buffer is empty.
|
||||
val full = Output(Bool()) // Output signal indicating whether the buffer is full.
|
||||
val related_1 = Output(
|
||||
Bool(),
|
||||
) // Output signal indicating whether the output data is related to a specific value.
|
||||
val input = Input(UInt(dataWidth.W)) // Data to be written to the buffer.
|
||||
val output = Output(UInt(dataWidth.W)) // Data read from the buffer.
|
||||
val related_data_1 = Input(
|
||||
UInt(relatedDataWidth.W),
|
||||
) // Related data used to check if the output data is related to a specific value.
|
||||
})
|
||||
|
||||
// Instantiate a Queue module with the given data width and buffer depth.
|
||||
val queue = Module(new Queue(UInt(dataWidth.W), buffDepth))
|
||||
|
||||
// Connect the input and output signals to the Queue module.
|
||||
queue.io.enq.valid := io.wen
|
||||
queue.io.enq.bits := io.input
|
||||
io.full := queue.io.count === buffDepth.U
|
||||
io.empty := queue.io.count === 0.U
|
||||
io.output := queue.io.deq.bits
|
||||
|
||||
// Connect the ready signal to the read enable input.
|
||||
queue.io.deq.ready := io.ren
|
||||
|
||||
// Check if the output data is related to a specific value.
|
||||
io.related_1 := queue.io.deq.valid && io.related_data_1 === queue.io.deq
|
||||
.bits(relatedDataWidth - 1, 0)
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
package cache
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines._
|
||||
import cpu.defines.Const._
|
||||
import cpu.CpuConfig
|
||||
import cpu.CacheConfig
|
||||
|
||||
class Cache(implicit cpuConfig: CpuConfig) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val inst = Flipped(new Cache_ICache())
|
||||
val data = Flipped(new Cache_DCache())
|
||||
val axi = new AXI()
|
||||
})
|
||||
|
||||
implicit val iCacheConfig = CacheConfig(cacheType = "icache")
|
||||
implicit val dCacheConfig = CacheConfig(cacheType = "dcache")
|
||||
|
||||
val icache = Module(new ICache(iCacheConfig))
|
||||
val dcache = Module(new DCache(dCacheConfig))
|
||||
val axi_interface = Module(new CacheAXIInterface())
|
||||
|
||||
icache.io.axi <> axi_interface.io.icache
|
||||
dcache.io.axi <> axi_interface.io.dcache
|
||||
|
||||
io.inst <> icache.io.cpu
|
||||
io.data <> dcache.io.cpu
|
||||
io.axi <> axi_interface.io.axi
|
||||
}
|
|
@ -1,85 +0,0 @@
|
|||
package cache
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines._
|
||||
|
||||
class CacheAXIInterface extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val icache = Flipped(new ICache_AXIInterface())
|
||||
val dcache = Flipped(new DCache_AXIInterface())
|
||||
val axi = new AXI()
|
||||
})
|
||||
|
||||
// pass-through aw {
|
||||
io.axi.aw.bits.id := 1.U
|
||||
io.axi.aw.bits.addr := io.dcache.aw.bits.addr
|
||||
io.axi.aw.bits.len := io.dcache.aw.bits.len
|
||||
io.axi.aw.bits.size := io.dcache.aw.bits.size
|
||||
io.axi.aw.valid := io.dcache.aw.valid
|
||||
io.axi.aw.bits.burst := 1.U
|
||||
io.axi.aw.bits.prot := 0.U
|
||||
io.axi.aw.bits.cache := 0.U
|
||||
io.axi.aw.bits.lock := 0.U
|
||||
io.dcache.aw.ready := io.axi.aw.ready
|
||||
// pass-through aw }
|
||||
|
||||
// pass-through w {
|
||||
io.axi.w.bits.id := 1.U
|
||||
io.axi.w.bits.data := io.dcache.w.bits.data
|
||||
io.axi.w.bits.strb := io.dcache.w.bits.strb
|
||||
io.axi.w.bits.last := io.dcache.w.bits.last
|
||||
io.axi.w.valid := io.dcache.w.valid
|
||||
io.dcache.w.ready := io.axi.w.ready
|
||||
// pass-through aw }
|
||||
|
||||
// pass-through b {
|
||||
io.dcache.b.bits.id := io.axi.b.bits.id
|
||||
io.dcache.b.valid := io.axi.b.valid
|
||||
io.dcache.b.bits.resp := io.axi.b.bits.resp
|
||||
io.axi.b.ready := io.dcache.b.ready
|
||||
// pass-through b }
|
||||
|
||||
// mux ar {
|
||||
// we need to lock ar to avoid signals change during handshake
|
||||
val ar_sel_lock = RegInit(false.B)
|
||||
val ar_sel_val = RegInit(false.B)
|
||||
val choose_dcache = Mux(ar_sel_lock, ar_sel_val, !io.icache.ar.valid && io.dcache.ar.valid)
|
||||
|
||||
when(io.axi.ar.valid) {
|
||||
when(io.axi.ar.ready) {
|
||||
ar_sel_lock := false.B
|
||||
}.otherwise {
|
||||
ar_sel_lock := true.B
|
||||
ar_sel_val := choose_dcache
|
||||
}
|
||||
}
|
||||
|
||||
io.axi.ar.bits.id := Cat(0.U(3.W), choose_dcache)
|
||||
io.axi.ar.bits.addr := Mux(choose_dcache, io.dcache.ar.bits.addr, io.icache.ar.bits.addr)
|
||||
io.axi.ar.bits.len := Mux(choose_dcache, io.dcache.ar.bits.len, io.icache.ar.bits.len)
|
||||
io.axi.ar.bits.size := Mux(choose_dcache, io.dcache.ar.bits.size, io.icache.ar.bits.size)
|
||||
io.axi.ar.valid := Mux(choose_dcache, io.dcache.ar.valid, io.icache.ar.valid)
|
||||
io.axi.ar.bits.burst := 1.U
|
||||
io.axi.ar.bits.prot := 0.U
|
||||
io.axi.ar.bits.cache := 0.U
|
||||
io.axi.ar.bits.lock := 0.U
|
||||
io.icache.ar.ready := !choose_dcache && io.axi.ar.ready
|
||||
io.dcache.ar.ready := choose_dcache && io.axi.ar.ready
|
||||
// mux ar }
|
||||
|
||||
// mux r based on rid {
|
||||
val r_sel = io.axi.r.bits.id(0)
|
||||
io.icache.r.bits.id := io.axi.r.bits.id
|
||||
io.icache.r.bits.data := io.axi.r.bits.data
|
||||
io.icache.r.bits.resp := io.axi.r.bits.resp
|
||||
io.icache.r.bits.last := io.axi.r.bits.last
|
||||
io.icache.r.valid := !r_sel && io.axi.r.valid
|
||||
io.dcache.r.bits.id := io.axi.r.bits.id
|
||||
io.dcache.r.bits.data := io.axi.r.bits.data
|
||||
io.dcache.r.bits.resp := io.axi.r.bits.resp
|
||||
io.dcache.r.bits.last := io.axi.r.bits.last
|
||||
io.dcache.r.valid := r_sel && io.axi.r.valid
|
||||
io.axi.r.ready := Mux(r_sel, io.dcache.r.ready, io.icache.r.ready)
|
||||
// mux r based on rid }
|
||||
}
|
|
@ -1,783 +0,0 @@
|
|||
package cache
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import memory._
|
||||
import cpu.CacheConfig
|
||||
import cpu.defines._
|
||||
import cpu.CpuConfig
|
||||
import cpu.defines.Const._
|
||||
import icache.mmu.AccessType
|
||||
|
||||
/*
|
||||
整个宽度为PADDR_WID的地址
|
||||
==========================================================
|
||||
| tag | index | offset |
|
||||
| | | bank index | bank offset |
|
||||
==========================================================
|
||||
|
||||
nway 组,nindex 行
|
||||
==============================================================
|
||||
| valid | dirty | tag | bank 0 | bank 1 | ... | bank n |
|
||||
| 1 | 1 | | | | | |
|
||||
==============================================================
|
||||
| bank |
|
||||
| data 0 | data 1 | ... | data n |
|
||||
| XLEN | XLEN | ... | XLEN |
|
||||
=====================================
|
||||
|
||||
本 CPU 的实现如下:
|
||||
每个bank分为多个dataBlocks,每个dataBlocks的宽度为AXI_DATA_WID,这样能方便的和AXI总线进行交互
|
||||
RV64实现中AXI_DATA_WID为64,所以每个dataBlocks可以存储1个数据
|
||||
为了简化设计,目前*一个bank中只有一个dataBlocks*,即每个bank中只能存储一个数据
|
||||
这样的话dataBlocks可以被简化掉,直接用bank代替
|
||||
//TODO:解决AXI_DATA_WID小于XLEN的情况
|
||||
|
||||
==============================================================
|
||||
| valid | dirty | tag | bank 0 | bank 1 | ... | bank n |
|
||||
| 1 | 1 | | | | | |
|
||||
==============================================================
|
||||
| bank |
|
||||
| dataBlocks |
|
||||
| data 0 |
|
||||
| 64 |
|
||||
===================
|
||||
*/
|
||||
|
||||
class WriteBufferUnit extends Bundle {
|
||||
val data = UInt(XLEN.W)
|
||||
val addr = UInt(XLEN.W)
|
||||
val strb = UInt(AXI_STRB_WID.W)
|
||||
val size = UInt(AXI_SIZE_WID.W)
|
||||
}
|
||||
|
||||
class DCache(cacheConfig: CacheConfig)(implicit cpuConfig: CpuConfig) extends Module with HasTlbConst with HasCSRConst {
|
||||
val nway = cacheConfig.nway
|
||||
val nindex = cacheConfig.nindex
|
||||
val nbank = cacheConfig.nbank
|
||||
val instFetchNum = cpuConfig.instFetchNum
|
||||
val bankOffsetWidth = cacheConfig.bankOffsetWidth
|
||||
val bankIndexWidth = cacheConfig.offsetWidth - bankOffsetWidth
|
||||
val bytesPerBank = cacheConfig.bytesPerBank
|
||||
val tagWidth = cacheConfig.tagWidth
|
||||
val indexWidth = cacheConfig.indexWidth
|
||||
val offsetWidth = cacheConfig.offsetWidth
|
||||
val bitsPerBank = cacheConfig.bitsPerBank
|
||||
val writeFifoDepth = 4
|
||||
|
||||
// 每个bank中存AXI_DATA_WID位的数据
|
||||
// TODO:目前的实现只保证了AXI_DATA_WID为XLEN的情况下的正确性
|
||||
require(AXI_DATA_WID == XLEN, "AXI_DATA_WID should be greater than XLEN")
|
||||
|
||||
def pAddr = new Bundle {
|
||||
val tag = UInt(ppnLen.W)
|
||||
val index = UInt(indexWidth.W)
|
||||
val offset = UInt(offsetWidth.W)
|
||||
}
|
||||
|
||||
def bankAddr = new Bundle {
|
||||
val index = UInt(bankIndexWidth.W)
|
||||
val offset = UInt(bankOffsetWidth.W)
|
||||
}
|
||||
|
||||
val io = IO(new Bundle {
|
||||
val cpu = Flipped(new Cache_DCache())
|
||||
val axi = new DCache_AXIInterface()
|
||||
})
|
||||
|
||||
// dcache的状态机
|
||||
val s_idle :: s_uncached :: s_fence :: s_replace :: s_wait :: s_tlb_refill :: Nil = Enum(6)
|
||||
val state = RegInit(s_idle)
|
||||
|
||||
// ptw的状态机
|
||||
val ptw_handshake :: ptw_send :: ptw_cached :: ptw_uncached :: ptw_check :: ptw_set :: Nil = Enum(6)
|
||||
val ptw_state = RegInit(ptw_handshake)
|
||||
|
||||
// 临时寄存器
|
||||
val ptw_working =
|
||||
ptw_state =/= ptw_handshake &&
|
||||
ptw_state =/= ptw_set &&
|
||||
!(io.cpu.tlb.ptw.pte.bits.access_fault || io.cpu.tlb.ptw.pte.bits.page_fault)
|
||||
val ptw_scratch = RegInit(0.U.asTypeOf(new Bundle {
|
||||
val paddr = pAddr
|
||||
val replace = Bool()
|
||||
val dcache_wait = Bool()
|
||||
}))
|
||||
|
||||
io.cpu.tlb.ptw.vpn.ready := false.B
|
||||
|
||||
// ==========================================================
|
||||
// | ppn | page offset |
|
||||
// ----------------------------------------------------------
|
||||
// | tag | index | offset |
|
||||
// | | | bank index | bank offset |
|
||||
// ==========================================================
|
||||
|
||||
// exe级的index,用于访问第i行的数据
|
||||
val exe_index = io.cpu.exe_addr(indexWidth + offsetWidth - 1, offsetWidth)
|
||||
// mem级的bank的index,用于访问第i个bank的数据
|
||||
val bank_index = io.cpu.addr(bankIndexWidth + bankOffsetWidth - 1, bankOffsetWidth)
|
||||
|
||||
// // 一个bank行内存了一个数据,所以bank_offset恒为0
|
||||
// val bank_offset =
|
||||
// if (bankOffsetWidth > log2Ceil(XLEN / 8))
|
||||
// io.cpu.addr(bankOffsetWidth - 1, log2Ceil(XLEN / 8)) // 保证地址对齐
|
||||
// else
|
||||
// 0.U
|
||||
|
||||
// axi信号中size的宽度,对于cached段,size为3位
|
||||
val cached_size = log2Ceil(AXI_DATA_WID / 8)
|
||||
val cached_len = (nbank - 1)
|
||||
|
||||
// * valid dirty * //
|
||||
// 每行有一个有效位和一个脏位
|
||||
val valid = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B))))) // FIXME:nway放前面会导致栈溢出错误
|
||||
val dirty = RegInit(VecInit(Seq.fill(nindex)(VecInit(Seq.fill(nway)(false.B)))))
|
||||
val lru = RegInit(VecInit(Seq.fill(nindex)(false.B))) // TODO:支持更多路数,目前只支持2路
|
||||
|
||||
// 用于指示哪个行的脏位为真
|
||||
val dirty_index = Wire(UInt(indexWidth.W))
|
||||
dirty_index := PriorityEncoder(dirty.map(_.asUInt.orR))
|
||||
// 用于指示哪个路的脏位为真
|
||||
val dirty_way = dirty(dirty_index)(1)
|
||||
|
||||
// 表示进入fence的写回状态
|
||||
val fence = RegInit(false.B)
|
||||
|
||||
// 读取bank这类sram的数据需要两拍
|
||||
val readsram = RegInit(false.B)
|
||||
|
||||
// 对于uncached段使用writeFifo进行写回
|
||||
val writeFifo = Module(new Queue(new WriteBufferUnit(), writeFifoDepth))
|
||||
val writeFifo_axi_busy = RegInit(false.B)
|
||||
val writeFifo_busy = writeFifo.io.deq.valid // || writeFifo_axi_busy 应该不需要这个判断
|
||||
|
||||
writeFifo.io.enq.valid := false.B
|
||||
writeFifo.io.enq.bits := 0.U.asTypeOf(new WriteBufferUnit())
|
||||
writeFifo.io.deq.ready := false.B
|
||||
|
||||
// * victim cache * //
|
||||
val burst = RegInit(0.U.asTypeOf(new Bundle {
|
||||
val wstrb = Vec(nway, UInt(nbank.W)) // 用于控制写回哪个bank
|
||||
}))
|
||||
|
||||
// 用于解决在replace时发生写回时读写时序不一致的问题
|
||||
val bank_wbindex = RegInit(0.U((offsetWidth - log2Ceil(XLEN / 8)).W))
|
||||
val bank_wbdata = RegInit(VecInit(Seq.fill(nbank)(0.U(XLEN.W))))
|
||||
|
||||
// 是否使用exe的地址进行提前访存
|
||||
val use_next_addr = (state === s_idle) || (state === s_wait)
|
||||
val do_replace = RegInit(false.B)
|
||||
// replace index 表示行的索引
|
||||
val replace_index = Wire(UInt(indexWidth.W))
|
||||
replace_index := io.cpu.addr(indexWidth + offsetWidth - 1, offsetWidth)
|
||||
val replace_wstrb = Wire(Vec(nbank, Vec(nway, UInt(AXI_STRB_WID.W))))
|
||||
val replace_wdata = Mux(state === s_replace, io.axi.r.bits.data, io.cpu.wdata)
|
||||
|
||||
val replace_way = lru(replace_index)
|
||||
|
||||
val replace_dirty = dirty(replace_index)(replace_way)
|
||||
|
||||
val tag_rindex = Mux(use_next_addr, exe_index, replace_index)
|
||||
val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
|
||||
val tag_wdata = RegInit(0.U(tagWidth.W))
|
||||
|
||||
val data = Wire(Vec(nbank, Vec(nway, UInt(XLEN.W))))
|
||||
// 使用寄存器类型才能防止idle时tag出现无法hit的错误
|
||||
val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
|
||||
|
||||
val tag_compare_valid = Wire(Vec(nway, Bool()))
|
||||
val cache_hit = tag_compare_valid.contains(true.B)
|
||||
|
||||
val mmio_read_stall = io.cpu.tlb.uncached && !io.cpu.wen.orR
|
||||
val mmio_write_stall = io.cpu.tlb.uncached && io.cpu.wen.orR && !writeFifo.io.enq.ready
|
||||
val cached_stall = !io.cpu.tlb.uncached && !cache_hit
|
||||
|
||||
val select_way = tag_compare_valid(1)
|
||||
|
||||
val dcache_stall = Mux(
|
||||
state === s_idle,
|
||||
Mux(
|
||||
io.cpu.en,
|
||||
(cached_stall || mmio_read_stall || mmio_write_stall || !io.cpu.tlb.hit),
|
||||
io.cpu.fence_i || fence
|
||||
),
|
||||
state =/= s_wait
|
||||
)
|
||||
io.cpu.dcache_ready := !dcache_stall
|
||||
|
||||
val saved_rdata = RegInit(0.U(XLEN.W))
|
||||
|
||||
io.cpu.rdata := Mux(state === s_wait, saved_rdata, data(bank_index)(select_way))
|
||||
|
||||
io.cpu.tlb.vaddr := io.cpu.addr
|
||||
io.cpu.tlb.access_type := Mux(io.cpu.en && io.cpu.wen.orR, AccessType.store, AccessType.load)
|
||||
io.cpu.tlb.en := io.cpu.en
|
||||
|
||||
val bank_raddr = Wire(UInt(indexWidth.W))
|
||||
bank_raddr := Mux(state === s_fence, dirty_index, Mux(use_next_addr, exe_index, replace_index))
|
||||
val tag_raddr = Mux(state === s_fence, dirty_index, tag_rindex)
|
||||
|
||||
val wstrb = Wire(Vec(nindex, (Vec(nway, UInt(AXI_STRB_WID.W)))))
|
||||
wstrb := 0.U.asTypeOf(wstrb)
|
||||
wstrb(bank_index)(select_way) := io.cpu.wstrb
|
||||
|
||||
// bank tagv ram
|
||||
val tagRam = Seq.fill(nway)(Module(new LUTRam(nindex, tagWidth)))
|
||||
for { i <- 0 until nway } {
|
||||
val bank = Seq.fill(nbank)(Module(new SimpleDualPortRam(nindex, AXI_DATA_WID, byteAddressable = true)))
|
||||
for { j <- 0 until nbank } {
|
||||
bank(j).io.ren := true.B
|
||||
bank(j).io.raddr := bank_raddr
|
||||
data(j)(i) := bank(j).io.rdata
|
||||
|
||||
bank(j).io.wen := replace_wstrb(j)(i).orR
|
||||
bank(j).io.waddr := replace_index
|
||||
bank(j).io.wdata := replace_wdata
|
||||
bank(j).io.wstrb := replace_wstrb(j)(i)
|
||||
|
||||
tagRam(i).io.raddr := tag_raddr
|
||||
tag(i) := tagRam(i).io.rdata
|
||||
|
||||
tagRam(i).io.wen := tag_wstrb(i)
|
||||
tagRam(i).io.waddr := replace_index
|
||||
tagRam(i).io.wdata := tag_wdata
|
||||
|
||||
tag_compare_valid(i) :=
|
||||
tag(i) === io.cpu.tlb.ptag && // tag相同
|
||||
valid(replace_index)(i) && // cache行有效位为真
|
||||
io.cpu.tlb.hit // 页表有效
|
||||
|
||||
replace_wstrb(j)(i) := Mux(
|
||||
tag_compare_valid(i) && io.cpu.en && io.cpu.wen.orR && !io.cpu.tlb.uncached && state === s_idle,
|
||||
wstrb(j)(i),
|
||||
Fill(AXI_STRB_WID, burst.wstrb(i)(j))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
val ar = RegInit(0.U.asTypeOf(new AR()))
|
||||
val arvalid = RegInit(false.B)
|
||||
io.axi.ar.bits <> ar
|
||||
io.axi.ar.valid := arvalid
|
||||
val rready = RegInit(false.B)
|
||||
io.axi.r.ready := rready
|
||||
val aw = RegInit(0.U.asTypeOf(new AW()))
|
||||
val awvalid = RegInit(false.B)
|
||||
io.axi.aw.bits <> aw
|
||||
io.axi.aw.valid := awvalid
|
||||
val w = RegInit(0.U.asTypeOf(new W()))
|
||||
val wvalid = RegInit(false.B)
|
||||
io.axi.w.bits <> w
|
||||
io.axi.w.bits.last := w.last && wvalid
|
||||
io.axi.w.valid := wvalid
|
||||
|
||||
io.axi.b.ready := true.B
|
||||
|
||||
val access_fault = RegInit(false.B)
|
||||
val page_fault = RegInit(false.B)
|
||||
// sv39的63-39位需要与第38位相同
|
||||
val addr_err = io.cpu
|
||||
.addr(XLEN - 1, VADDR_WID)
|
||||
.asBools
|
||||
.map(_ =/= io.cpu.addr(VADDR_WID - 1))
|
||||
.reduce(_ || _)
|
||||
|
||||
io.cpu.access_fault := access_fault
|
||||
io.cpu.page_fault := page_fault
|
||||
|
||||
// write buffer
|
||||
when(writeFifo_axi_busy) {
|
||||
when(io.axi.aw.fire) {
|
||||
awvalid := false.B
|
||||
}
|
||||
when(io.axi.w.fire) {
|
||||
wvalid := false.B
|
||||
w.last := false.B
|
||||
}
|
||||
when(io.axi.b.fire) {
|
||||
writeFifo_axi_busy := false.B
|
||||
}
|
||||
}.elsewhen(writeFifo.io.deq.valid) {
|
||||
writeFifo.io.deq.ready := writeFifo.io.deq.valid
|
||||
when(writeFifo.io.deq.fire) {
|
||||
aw.addr := writeFifo.io.deq.bits.addr
|
||||
aw.size := writeFifo.io.deq.bits.size
|
||||
w.data := writeFifo.io.deq.bits.data
|
||||
w.strb := writeFifo.io.deq.bits.strb
|
||||
}
|
||||
aw.len := 0.U
|
||||
awvalid := true.B
|
||||
w.last := true.B
|
||||
wvalid := true.B
|
||||
writeFifo_axi_busy := true.B
|
||||
}
|
||||
|
||||
switch(state) {
|
||||
is(s_idle) {
|
||||
access_fault := false.B // 在idle时清除access_fault
|
||||
page_fault := false.B // 在idle时清除page_fault
|
||||
when(io.cpu.en) {
|
||||
when(addr_err) {
|
||||
access_fault := true.B
|
||||
}.elsewhen(!io.cpu.tlb.hit) {
|
||||
state := s_tlb_refill
|
||||
}.elsewhen(io.cpu.tlb.uncached) {
|
||||
when(io.cpu.wen.orR) {
|
||||
when(writeFifo.io.enq.ready) {
|
||||
writeFifo.io.enq.valid := true.B
|
||||
writeFifo.io.enq.bits.addr := io.cpu.tlb.paddr
|
||||
writeFifo.io.enq.bits.size := io.cpu.rlen
|
||||
writeFifo.io.enq.bits.strb := io.cpu.wstrb
|
||||
writeFifo.io.enq.bits.data := io.cpu.wdata
|
||||
|
||||
when(!io.cpu.complete_single_request) {
|
||||
state := s_wait
|
||||
}
|
||||
}
|
||||
}.elsewhen(!writeFifo_busy) {
|
||||
ar.addr := io.cpu.tlb.paddr
|
||||
ar.len := 0.U
|
||||
ar.size := io.cpu.rlen
|
||||
arvalid := true.B
|
||||
state := s_uncached
|
||||
rready := true.B
|
||||
} // when store buffer busy, read will stop at s_idle but stall pipeline.
|
||||
}.otherwise {
|
||||
when(!cache_hit) {
|
||||
state := s_replace
|
||||
}.otherwise {
|
||||
when(!dcache_stall) {
|
||||
// update lru and mark dirty
|
||||
replace_way := ~select_way
|
||||
when(io.cpu.wen.orR) {
|
||||
dirty(replace_index)(select_way) := true.B
|
||||
}
|
||||
when(!io.cpu.complete_single_request) {
|
||||
saved_rdata := data(bank_index)(select_way)
|
||||
state := s_wait
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}.otherwise {
|
||||
io.cpu.tlb.ptw.vpn.ready := !ptw_working
|
||||
when(io.cpu.fence_i) {
|
||||
// fence.i 需要将所有脏位为true的行写回
|
||||
when(dirty.asUInt.orR) {
|
||||
when(!writeFifo_busy) {
|
||||
state := s_fence
|
||||
readsram := false.B // bank读数据要两拍
|
||||
}
|
||||
}.otherwise {
|
||||
// 当所有脏位为fault时,fence.i可以直接完成
|
||||
state := s_wait
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
is(s_uncached) {
|
||||
when(arvalid && io.axi.ar.ready) {
|
||||
arvalid := false.B
|
||||
}
|
||||
when(io.axi.r.fire) {
|
||||
rready := false.B
|
||||
saved_rdata := io.axi.r.bits.data
|
||||
access_fault := io.axi.r.bits.resp =/= RESP_OKEY.U
|
||||
state := s_wait
|
||||
}
|
||||
}
|
||||
is(s_fence) {
|
||||
when(fence) {
|
||||
when(io.axi.aw.fire) {
|
||||
awvalid := false.B
|
||||
}
|
||||
when(io.axi.w.fire) {
|
||||
when(w.last) {
|
||||
wvalid := false.B
|
||||
}.otherwise {
|
||||
bank_wbindex := bank_wbindex + 1.U
|
||||
w.data := data(bank_wbindex + 1.U)(dirty_way)
|
||||
when(bank_wbindex + 1.U === (cached_len).U) {
|
||||
w.last := true.B
|
||||
}
|
||||
}
|
||||
}
|
||||
when(io.axi.b.valid) {
|
||||
// TODO: 增加此处的acc_err错误处理
|
||||
// acc_err := io.axi.b.bits.resp =/= RESP_OKEY.U
|
||||
dirty(dirty_index)(dirty_way) := false.B // 写回完成,清除脏位
|
||||
fence := false.B
|
||||
}
|
||||
}.elsewhen(dirty.asUInt.orR) {
|
||||
readsram := true.B
|
||||
when(readsram) {
|
||||
// for axi write
|
||||
readsram := false.B
|
||||
aw.addr := Cat(
|
||||
Mux(dirty_way === 0.U, tagRam(0).io.rdata, tagRam(1).io.rdata),
|
||||
dirty_index,
|
||||
0.U(offsetWidth.W)
|
||||
)
|
||||
aw.len := cached_len.U
|
||||
aw.size := cached_size.U
|
||||
awvalid := true.B
|
||||
w.data := data(0)(dirty_way) // 从第零块bank开始写回
|
||||
w.strb := ~0.U(AXI_STRB_WID.W)
|
||||
w.last := false.B
|
||||
wvalid := true.B
|
||||
bank_wbindex := 0.U
|
||||
fence := true.B
|
||||
}
|
||||
}.otherwise {
|
||||
state := s_wait
|
||||
}
|
||||
}
|
||||
is(s_replace) {
|
||||
// 防止和写队列冲突
|
||||
when(!writeFifo_busy) {
|
||||
when(do_replace) {
|
||||
when(replace_dirty) {
|
||||
when(io.axi.aw.fire) {
|
||||
awvalid := false.B
|
||||
}
|
||||
when(io.axi.w.fire) {
|
||||
when(w.last) {
|
||||
wvalid := false.B
|
||||
}.otherwise {
|
||||
bank_wbindex := bank_wbindex + 1.U
|
||||
w.data := bank_wbdata(bank_wbindex + 1.U)
|
||||
when(bank_wbindex + 1.U === (cached_len).U) {
|
||||
w.last := true.B
|
||||
}
|
||||
}
|
||||
}
|
||||
when(io.axi.b.valid) {
|
||||
// TODO: 增加此处的acc_err错误处理
|
||||
// acc_err := io.axi.b.bits.resp =/= RESP_OKEY.U
|
||||
replace_dirty := false.B // 写回完成,清除脏位
|
||||
}
|
||||
} //上面都是写回部分的代码
|
||||
when(io.axi.ar.fire) {
|
||||
tag_wstrb(replace_way) := false.B
|
||||
arvalid := false.B
|
||||
}
|
||||
when(io.axi.r.fire) {
|
||||
when(io.axi.r.bits.last) {
|
||||
rready := false.B
|
||||
burst.wstrb(replace_way) := 0.U
|
||||
}.otherwise {
|
||||
burst.wstrb(replace_way) := burst.wstrb(replace_way) << 1
|
||||
}
|
||||
}
|
||||
when(
|
||||
(!replace_dirty || io.axi.b.valid) && // 不需要替换或写回完成
|
||||
((io.axi.r.valid && io.axi.r.bits.last) || !rready) // 读取完成
|
||||
) {
|
||||
valid(replace_index)(replace_way) := true.B
|
||||
do_replace := false.B
|
||||
ptw_scratch.replace := false.B
|
||||
when(ptw_working && io.cpu.tlb.ptw.access_type =/= AccessType.fetch) {
|
||||
// ptw复用的模式
|
||||
state := s_tlb_refill
|
||||
}.otherwise {
|
||||
when(ptw_scratch.dcache_wait && !io.cpu.complete_single_request) {
|
||||
state := s_wait
|
||||
}.otherwise {
|
||||
ptw_scratch.dcache_wait := false.B
|
||||
state := s_idle
|
||||
}
|
||||
}
|
||||
}
|
||||
}.otherwise {
|
||||
// 增加了一拍,用于sram读取数据
|
||||
readsram := true.B
|
||||
when(readsram) {
|
||||
readsram := false.B
|
||||
do_replace := true.B
|
||||
ar.len := cached_len.U
|
||||
ar.size := cached_size.U // 8 字节
|
||||
arvalid := true.B
|
||||
rready := true.B
|
||||
burst.wstrb(replace_way) := 1.U // 先写入第一块bank
|
||||
tag_wstrb(replace_way) := true.B
|
||||
when(!ptw_working) {
|
||||
// dcache的普通模式
|
||||
// for ar axi
|
||||
ar.addr := Cat(io.cpu.tlb.paddr(PADDR_WID - 1, offsetWidth), 0.U(offsetWidth.W))
|
||||
tag_wdata := io.cpu.tlb.ptag
|
||||
}.otherwise {
|
||||
// ptw复用的模式
|
||||
ar.addr := Cat(ptw_scratch.paddr.tag, ptw_scratch.paddr.index, 0.U(offsetWidth.W))
|
||||
tag_wdata := ptw_scratch.paddr.tag
|
||||
}
|
||||
when(replace_dirty) {
|
||||
// cache行的脏位为真时需要写回,备份一下cache行,便于处理读写时序问题
|
||||
(0 until nbank).map(i => bank_wbdata(i) := data(i)(replace_way))
|
||||
aw.addr := Cat(tag(replace_way), replace_index, 0.U(offsetWidth.W))
|
||||
aw.len := cached_len.U
|
||||
aw.size := cached_size.U
|
||||
awvalid := true.B
|
||||
w.data := data(0)(replace_way)
|
||||
w.strb := ~0.U(AXI_STRB_WID.W)
|
||||
w.last := false.B
|
||||
wvalid := true.B
|
||||
bank_wbindex := 0.U
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
is(s_wait) {
|
||||
// 等待流水线的allow_to_go信号,防止多次发出读、写请求
|
||||
io.cpu.tlb.ptw.vpn.ready := !ptw_working
|
||||
ptw_scratch.dcache_wait := true.B
|
||||
when(io.cpu.complete_single_request) {
|
||||
ptw_scratch.dcache_wait := false.B
|
||||
access_fault := false.B // 清除access_fault
|
||||
page_fault := false.B // 清除page_fault
|
||||
state := s_idle
|
||||
}
|
||||
}
|
||||
is(s_tlb_refill) {
|
||||
io.cpu.tlb.ptw.vpn.ready := !ptw_working
|
||||
when(io.cpu.tlb.access_fault) {
|
||||
access_fault := true.B
|
||||
state := s_wait
|
||||
}.elsewhen(io.cpu.tlb.page_fault) {
|
||||
page_fault := true.B
|
||||
state := s_wait
|
||||
}.otherwise {
|
||||
when(io.cpu.tlb.hit) {
|
||||
state := s_idle
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ==========================================================
|
||||
// 实现页表访问,回填tlb
|
||||
val satp = io.cpu.tlb.csr.satp.asTypeOf(satpBundle)
|
||||
val mstatus = io.cpu.tlb.csr.mstatus.asTypeOf(new Mstatus)
|
||||
val mode = Mux(io.cpu.tlb.access_type === AccessType.fetch, io.cpu.tlb.csr.imode, io.cpu.tlb.csr.dmode)
|
||||
val sum = mstatus.sum
|
||||
val mxr = mstatus.mxr
|
||||
val vpn = io.cpu.tlb.ptw.vpn.bits.asTypeOf(vpnBundle)
|
||||
val access_type = io.cpu.tlb.ptw.access_type
|
||||
val ppn = RegInit(0.U(ppnLen.W))
|
||||
val vpn_index = RegInit(0.U(log2Up(level).W)) // 页表访问的层级
|
||||
val pte = RegInit(0.U.asTypeOf(pteBundle)) // 页表项
|
||||
|
||||
io.cpu.tlb.ptw.pte.valid := false.B
|
||||
io.cpu.tlb.ptw.pte.bits := DontCare
|
||||
io.cpu.tlb.ptw.pte.bits.access_fault := false.B
|
||||
io.cpu.tlb.ptw.pte.bits.page_fault := false.B
|
||||
io.cpu.tlb.complete_single_request := io.cpu.complete_single_request
|
||||
require(AXI_DATA_WID == XLEN) // 目前只考虑了AXI_DATA_WID == XLEN的情况
|
||||
|
||||
def raisePageFault(): Unit = {
|
||||
io.cpu.tlb.ptw.pte.valid := true.B
|
||||
io.cpu.tlb.ptw.pte.bits.page_fault := true.B
|
||||
ptw_state := ptw_handshake
|
||||
}
|
||||
|
||||
def modeCheck(): Unit = {
|
||||
switch(mode) {
|
||||
is(ModeS) {
|
||||
when(pte.flag.u && !sum) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
ptw_state := ptw_set
|
||||
}
|
||||
}
|
||||
is(ModeU) {
|
||||
when(!pte.flag.u) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
ptw_state := ptw_set
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch(ptw_state) {
|
||||
is(ptw_handshake) { // 0
|
||||
// 页表访问虚地址握手
|
||||
when(io.cpu.tlb.ptw.vpn.fire) {
|
||||
vpn_index := (level - 1).U
|
||||
ppn := satp.ppn
|
||||
ptw_state := ptw_send
|
||||
}
|
||||
}
|
||||
is(ptw_send) { // 1
|
||||
val vpnn = Mux1H(
|
||||
Seq(
|
||||
(vpn_index === 0.U) -> vpn.vpn0,
|
||||
(vpn_index === 1.U) -> vpn.vpn1,
|
||||
(vpn_index === 2.U) -> vpn.vpn2
|
||||
)
|
||||
)
|
||||
val ptw_addr = paddrApply(ppn, vpnn).asTypeOf(pAddr)
|
||||
val pte_uncached = AddressSpace.isMMIO(ptw_addr.asUInt)
|
||||
when(pte_uncached) {
|
||||
arvalid := true.B
|
||||
ar.addr := ptw_addr.asUInt
|
||||
ar.size := log2Ceil(AXI_DATA_WID / 8).U // 一个pte的大小是8字节
|
||||
ar.len := 0.U // 读一拍即可
|
||||
rready := true.B
|
||||
ptw_state := ptw_uncached
|
||||
}.otherwise {
|
||||
bank_raddr := ptw_addr.index
|
||||
tagRam.map(_.io.raddr := ptw_addr.index)
|
||||
replace_index := ptw_addr.index
|
||||
ptw_state := ptw_cached
|
||||
ptw_scratch.paddr := ptw_addr
|
||||
ptw_scratch.replace := false.B
|
||||
}
|
||||
}
|
||||
is(ptw_cached) { // 2
|
||||
bank_raddr := ptw_scratch.paddr.index
|
||||
tagRam.map(_.io.raddr := ptw_scratch.paddr.index)
|
||||
replace_index := ptw_scratch.paddr.index
|
||||
for { i <- 0 until nway } {
|
||||
tag_compare_valid(i) :=
|
||||
tag(i) === ptw_scratch.paddr.tag && // tag相同
|
||||
valid(ptw_scratch.paddr.index)(i) // cache行有效位为真
|
||||
}
|
||||
when(!ptw_scratch.replace) {
|
||||
when(cache_hit) {
|
||||
val pte_temp = data(ptw_scratch.paddr.offset.asTypeOf(bankAddr).index)(select_way).asTypeOf(pteBundle)
|
||||
when(!pte_temp.flag.v || !pte_temp.flag.r && pte_temp.flag.w) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
when(pte_temp.flag.r || pte_temp.flag.x) {
|
||||
// 找到了叶子页
|
||||
pte := pte_temp
|
||||
ptw_state := ptw_check
|
||||
}.otherwise {
|
||||
// 该pte指向下一个页表
|
||||
vpn_index := vpn_index - 1.U
|
||||
when(vpn_index - 1.U < 0.U) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
ppn := pte_temp.ppn
|
||||
ptw_state := ptw_send
|
||||
}
|
||||
}
|
||||
}
|
||||
}.otherwise {
|
||||
ptw_scratch.replace := true.B
|
||||
state := s_replace // 直接复用dcache的replace状态机,帮我们进行replace操作
|
||||
}
|
||||
}
|
||||
}
|
||||
is(ptw_uncached) { // 3
|
||||
when(io.axi.ar.fire) {
|
||||
arvalid := false.B
|
||||
}
|
||||
when(io.axi.r.fire) {
|
||||
rready := false.B
|
||||
val pte_temp = io.axi.r.bits.data.asTypeOf(pteBundle)
|
||||
when(!pte_temp.flag.v || !pte_temp.flag.r && pte_temp.flag.w) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
when(pte_temp.flag.r || pte_temp.flag.x) {
|
||||
// 找到了叶子页
|
||||
pte := pte_temp
|
||||
ptw_state := ptw_check
|
||||
}.otherwise {
|
||||
// 该pte指向下一个页表
|
||||
vpn_index := vpn_index - 1.U
|
||||
when(vpn_index - 1.U < 0.U) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
ppn := pte_temp.ppn
|
||||
ptw_state := ptw_send
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
is(ptw_check) { // 4
|
||||
// 检查权限
|
||||
switch(access_type) {
|
||||
is(AccessType.load) {
|
||||
when(mxr) {
|
||||
when(!pte.flag.r && !pte.flag.x) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
modeCheck()
|
||||
}
|
||||
}.otherwise {
|
||||
when(!pte.flag.r) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
modeCheck()
|
||||
}
|
||||
}
|
||||
}
|
||||
is(AccessType.store) {
|
||||
when(!pte.flag.w) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
modeCheck()
|
||||
}
|
||||
}
|
||||
is(AccessType.fetch) {
|
||||
when(!pte.flag.x) {
|
||||
raisePageFault()
|
||||
}.otherwise {
|
||||
modeCheck()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
is(ptw_set) { // 5
|
||||
when(
|
||||
vpn_index > 0.U && (
|
||||
vpn_index === 1.U && pte.ppn.asTypeOf(ppnBundle).ppn0.orR ||
|
||||
vpn_index === 2.U && (pte.ppn.asTypeOf(ppnBundle).ppn1.orR || pte.ppn.asTypeOf(ppnBundle).ppn0.orR)
|
||||
)
|
||||
) {
|
||||
raisePageFault()
|
||||
}.elsewhen(!pte.flag.a || access_type === AccessType.store && !pte.flag.d) {
|
||||
raisePageFault() // 使用软件的方式设置脏位以及访问位
|
||||
}.otherwise {
|
||||
// 翻译成功
|
||||
val rmask = WireInit(~0.U(maskLen.W))
|
||||
io.cpu.tlb.ptw.pte.valid := true.B
|
||||
io.cpu.tlb.ptw.pte.bits.rmask := rmask
|
||||
io.cpu.tlb.ptw.pte.bits.entry := pte
|
||||
val ppn_set = Wire(ppnBundle)
|
||||
when(vpn_index === 2.U) {
|
||||
ppn_set.ppn2 := pte.ppn.asTypeOf(ppnBundle).ppn2
|
||||
ppn_set.ppn1 := vpn.vpn1
|
||||
ppn_set.ppn0 := vpn.vpn0
|
||||
rmask := 0.U
|
||||
}.elsewhen(vpn_index === 1.U) {
|
||||
ppn_set.ppn2 := pte.ppn.asTypeOf(ppnBundle).ppn2
|
||||
ppn_set.ppn1 := pte.ppn.asTypeOf(ppnBundle).ppn1
|
||||
ppn_set.ppn0 := vpn.vpn0
|
||||
rmask := Cat(Fill(ppn1Len, true.B), 0.U(ppn0Len.W))
|
||||
}.otherwise {
|
||||
ppn_set := pte.ppn.asTypeOf(ppnBundle)
|
||||
}
|
||||
io.cpu.tlb.ptw.pte.bits.entry.ppn := ppn_set.asUInt
|
||||
|
||||
ptw_state := ptw_handshake
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println("----------------------------------------")
|
||||
println("DCache: ")
|
||||
println("nindex: " + nindex)
|
||||
println("nbank: " + nbank)
|
||||
println("bitsPerBank: " + bitsPerBank)
|
||||
println("bankOffsetWidth: " + bankOffsetWidth)
|
||||
println("bankIndexWidth: " + bankIndexWidth)
|
||||
println("tagWidth: " + tagWidth)
|
||||
println("indexWidth: " + indexWidth)
|
||||
println("offsetWidth: " + offsetWidth)
|
||||
println("----------------------------------------")
|
||||
}
|
|
@ -1,367 +0,0 @@
|
|||
package cache
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import memory._
|
||||
import cpu.CacheConfig
|
||||
import cpu.defines._
|
||||
import cpu.CpuConfig
|
||||
import cpu.defines.Const._
|
||||
|
||||
/*
|
||||
整个宽度为PADDR_WID的地址
|
||||
==========================================================
|
||||
| tag | index | offset |
|
||||
| | | bank index | bank offset |
|
||||
==========================================================
|
||||
|
||||
nway 组,nindex 行
|
||||
======================================================
|
||||
| valid | tag | bank 0 | bank 1 | ... | bank n |
|
||||
| 1 | | | | | |
|
||||
======================================================
|
||||
| bank |
|
||||
| inst 0 | inst 1 | ... | inst n |
|
||||
| 32 | 32 | ... | 32 |
|
||||
=====================================
|
||||
|
||||
本CPU的实现如下:
|
||||
每个bank分为多个instBlocks,每个instBlocks的宽度为AXI_DATA_WID,这样能方便的和AXI总线进行交互
|
||||
RV64实现中AXI_DATA_WID为64,所以每个instBlocks可以存储2条指令
|
||||
而instBlocks的个数会和instFetchNum相关
|
||||
- 当instFetchNum为4时,instBlocks的个数为2
|
||||
- 当instFetchNum为2时,instBlocks的个数为1
|
||||
读取数据时会将一个bank中的所有instBlocks读取出来,然后再将instBlocks中的数据按照偏移量重新排列
|
||||
这样的设计可以保证一个bank的指令数对应instFetchNum
|
||||
|
||||
======================================================
|
||||
| valid | tag | bank 0 | bank 1 | ... | bank n |
|
||||
| 1 | | | | | |
|
||||
======================================================
|
||||
| bank |
|
||||
| instBlocks | instBlocks |
|
||||
| inst 0 | inst 1 | inst 0 | inst 1 |
|
||||
| 32 | 32 | 32 | 32 |
|
||||
=====================================
|
||||
*/
|
||||
|
||||
class ICache(cacheConfig: CacheConfig)(implicit cpuConfig: CpuConfig) extends Module with HasTlbConst {
|
||||
val nway = cacheConfig.nway
|
||||
val nindex = cacheConfig.nindex
|
||||
val nbank = cacheConfig.nbank
|
||||
val instFetchNum = cpuConfig.instFetchNum
|
||||
val bankOffsetWidth = cacheConfig.bankOffsetWidth
|
||||
val bankIndexWidth = cacheConfig.offsetWidth - bankOffsetWidth
|
||||
val bytesPerBank = cacheConfig.bytesPerBank
|
||||
val tagWidth = cacheConfig.tagWidth
|
||||
val indexWidth = cacheConfig.indexWidth
|
||||
val offsetWidth = cacheConfig.offsetWidth
|
||||
val bitsPerBank = cacheConfig.bitsPerBank
|
||||
|
||||
def pAddr = new Bundle {
|
||||
val tag = UInt(ppnLen.W)
|
||||
val index = UInt(indexWidth.W)
|
||||
val offset = UInt(offsetWidth.W)
|
||||
}
|
||||
|
||||
def bankAddr = new Bundle {
|
||||
val index = UInt(bankIndexWidth.W)
|
||||
val offset = UInt(bankOffsetWidth.W)
|
||||
}
|
||||
|
||||
val io = IO(new Bundle {
|
||||
val cpu = Flipped(new Cache_ICache())
|
||||
val axi = new ICache_AXIInterface()
|
||||
})
|
||||
require(isPow2(instFetchNum), "ninst must be power of 2")
|
||||
require(instFetchNum == bytesPerBank / 4, "instFetchNum must equal to instperbank")
|
||||
require(
|
||||
bitsPerBank >= AXI_DATA_WID && bitsPerBank % AXI_DATA_WID == 0,
|
||||
"bitsPerBank must be greater than AXI_DATA_WID"
|
||||
)
|
||||
|
||||
// 一个bank是bitsPerBank宽度,一个bank中有instFetchNum个指令
|
||||
// 每个bank中指令块的个数,一个指令块是AXI_DATA_WID宽度
|
||||
val instBlocksPerBank = bitsPerBank / AXI_DATA_WID
|
||||
|
||||
val bank_index = io.cpu.addr(0)(offsetWidth - 1, bankOffsetWidth)
|
||||
val bank_offset = io.cpu.addr(0)(bankOffsetWidth - 1, log2Ceil(INST_WID / 8)) // PC低2位必定是0
|
||||
|
||||
// * fsm * //
|
||||
val s_idle :: s_uncached :: s_replace :: s_wait :: s_fence :: s_tlb_refill :: Nil = Enum(6)
|
||||
val state = RegInit(s_idle)
|
||||
|
||||
// nway 路,每路 nindex 行,每行 nbank 个 bank,每行的nbank共用一个valid
|
||||
val valid = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(nindex)(false.B)))))
|
||||
|
||||
// * should choose next addr * //
|
||||
val use_next_addr = (state === s_idle) || (state === s_wait)
|
||||
|
||||
// 读取一个cache条目中的所有bank行
|
||||
val data = Wire(Vec(nway, Vec(nbank, Vec(instBlocksPerBank, UInt(AXI_DATA_WID.W)))))
|
||||
val data_rindex = io.cpu.addr(use_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
|
||||
|
||||
val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W))))
|
||||
val tag_raddr = io.cpu.addr(use_next_addr)(indexWidth + offsetWidth - 1, offsetWidth)
|
||||
val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B)))
|
||||
val tag_wdata = RegInit(0.U(tagWidth.W))
|
||||
|
||||
// * lru * //// TODO:检查lru的正确性,增加可拓展性,目前只支持两路的cache
|
||||
val lru = RegInit(VecInit(Seq.fill(nindex)(false.B)))
|
||||
|
||||
val replace_index = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth)
|
||||
// 需要替换的路号
|
||||
val replace_way = lru(replace_index)
|
||||
|
||||
// 用于控制写入一行cache条目中的哪个bank, 一个bank可能有多次写入
|
||||
val replace_wstrb = RegInit(
|
||||
VecInit(Seq.fill(nway)(VecInit(Seq.fill(nbank)(VecInit(Seq.fill(instBlocksPerBank)((false.B)))))))
|
||||
)
|
||||
|
||||
// * cache hit * //
|
||||
val tag_compare_valid = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.ptag && valid(i)(replace_index)))
|
||||
val cache_hit = tag_compare_valid.contains(true.B)
|
||||
val cache_hit_available = cache_hit && io.cpu.tlb.hit && !io.cpu.tlb.uncached
|
||||
val select_way = tag_compare_valid(1) // 1路命中时值为1,0路命中时值为0 //TODO:支持更多路数
|
||||
|
||||
// 将一个 bank 中的指令分成 instFetchNum 份,每份 INST_WID bit
|
||||
val inst_in_bank = VecInit(
|
||||
Seq.tabulate(instFetchNum)(i => data(select_way)(bank_index).asUInt((i + 1) * INST_WID - 1, i * INST_WID))
|
||||
)
|
||||
|
||||
// 将 inst_in_bank 中的指令按照 bank_offset 位偏移量重新排列
|
||||
// 处理偏移导致的跨 bank 读取
|
||||
// 当offset为0时,不需要重新排列
|
||||
// 当offset为1时,此时发送到cpu的inst0应该是inst1,inst1应该无数据,并设置对应的valid
|
||||
val inst = VecInit(
|
||||
Seq.tabulate(instFetchNum)(i =>
|
||||
Mux(
|
||||
i.U <= ((instFetchNum - 1).U - bank_offset),
|
||||
inst_in_bank(i.U + bank_offset),
|
||||
0.U
|
||||
)
|
||||
)
|
||||
)
|
||||
val inst_valid = VecInit(
|
||||
Seq.tabulate(instFetchNum)(i => cache_hit_available && i.U <= ((instFetchNum - 1).U - bank_offset))
|
||||
)
|
||||
|
||||
val rdata_in_wait = RegInit(VecInit(Seq.fill(instFetchNum)(0.U.asTypeOf(new Bundle {
|
||||
val inst = UInt(INST_WID.W)
|
||||
val valid = Bool()
|
||||
}))))
|
||||
|
||||
// 对于可缓存段访存时读取的数据宽度应该和AXI_DATA的宽度相同
|
||||
val cached_size = log2Ceil(AXI_DATA_WID / 8)
|
||||
val cached_len = (nbank * instBlocksPerBank - 1)
|
||||
// 对于不可缓存段访存时读取的数据宽度应该和指令宽度相同
|
||||
val uncached_size = log2Ceil(INST_WID / 8)
|
||||
val uncached_len = 0
|
||||
|
||||
// bank tag ram
|
||||
for { i <- 0 until nway } {
|
||||
// 每一个条目中有nbank个bank,每个bank存储instFetchNum个指令
|
||||
// 每次写入cache时将写完一整个cache行
|
||||
val bank =
|
||||
Seq.fill(nbank)(
|
||||
Seq.fill(instBlocksPerBank)(
|
||||
Module(new SimpleDualPortRam(depth = nindex, width = AXI_DATA_WID, byteAddressable = false))
|
||||
)
|
||||
)
|
||||
for { j <- 0 until nbank } {
|
||||
for { k <- 0 until instBlocksPerBank } {
|
||||
bank(j)(k).io.ren := true.B
|
||||
bank(j)(k).io.raddr := data_rindex
|
||||
data(i)(j)(k) := bank(j)(k).io.rdata
|
||||
|
||||
bank(j)(k).io.wen := replace_wstrb(i)(j)(k)
|
||||
bank(j)(k).io.waddr := replace_index
|
||||
bank(j)(k).io.wdata := io.axi.r.bits.data
|
||||
bank(j)(k).io.wstrb := replace_wstrb(i)(j)(k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for { i <- 0 until instFetchNum } {
|
||||
io.cpu.inst_valid(i) := Mux(state === s_idle, inst_valid(i), rdata_in_wait(i).valid) && io.cpu.req
|
||||
io.cpu.inst(i) := Mux(state === s_idle, inst(i), rdata_in_wait(i).inst)
|
||||
}
|
||||
|
||||
for { i <- 0 until nway } {
|
||||
// 实例化了nway个tag ram
|
||||
val tagBram = Module(new LUTRam(nindex, tagWidth))
|
||||
tagBram.io.raddr := tag_raddr
|
||||
tag(i) := tagBram.io.rdata
|
||||
|
||||
tagBram.io.wen := tag_wstrb(i)
|
||||
tagBram.io.waddr := replace_index
|
||||
tagBram.io.wdata := tag_wdata
|
||||
}
|
||||
|
||||
io.cpu.icache_stall := Mux(state === s_idle, (!cache_hit_available && io.cpu.req), state =/= s_wait)
|
||||
|
||||
io.cpu.tlb.vaddr := io.cpu.addr(0)
|
||||
io.cpu.tlb.complete_single_request := io.cpu.complete_single_request
|
||||
io.cpu.tlb.en := io.cpu.req && (state === s_idle || state === s_tlb_refill)
|
||||
|
||||
val ar = RegInit(0.U.asTypeOf(new AR()))
|
||||
val arvalid = RegInit(false.B)
|
||||
ar <> io.axi.ar.bits
|
||||
arvalid <> io.axi.ar.valid
|
||||
|
||||
val r = RegInit(0.U.asTypeOf(new R()))
|
||||
val rready = RegInit(false.B)
|
||||
r <> io.axi.r.bits
|
||||
rready <> io.axi.r.ready
|
||||
|
||||
val access_fault = RegInit(false.B)
|
||||
val page_fault = RegInit(false.B)
|
||||
val addr_misaligned = RegInit(false.B)
|
||||
// sv39的63-39位不与第38位相同,或者地址未对齐时,地址错
|
||||
val addr_err =
|
||||
io.cpu
|
||||
.addr(use_next_addr)(XLEN - 1, VADDR_WID)
|
||||
.asBools
|
||||
.map(_ =/= io.cpu.addr(use_next_addr)(VADDR_WID - 1))
|
||||
.reduce(_ || _) ||
|
||||
io.cpu.addr(use_next_addr)(log2Ceil(INST_WID / 8) - 1, 0).orR
|
||||
|
||||
io.cpu.access_fault := access_fault //TODO:实现cached段中的访存response错误
|
||||
io.cpu.page_fault := page_fault
|
||||
io.cpu.addr_misaligned := addr_misaligned
|
||||
|
||||
switch(state) {
|
||||
is(s_idle) {
|
||||
access_fault := false.B // 在idle时清除access_fault
|
||||
page_fault := false.B // 在idle时清除page_fault
|
||||
addr_misaligned := false.B // 在idle时清除addr_misaligned
|
||||
when(io.cpu.req) {
|
||||
when(addr_err) {
|
||||
when(io.cpu.addr(use_next_addr)(log2Ceil(INST_WID / 8) - 1, 0).orR) {
|
||||
addr_misaligned := true.B
|
||||
}.otherwise {
|
||||
access_fault := true.B
|
||||
}
|
||||
state := s_wait
|
||||
rdata_in_wait(0).inst := Instructions.NOP
|
||||
rdata_in_wait(0).valid := true.B
|
||||
}.elsewhen(!io.cpu.tlb.hit) {
|
||||
state := s_tlb_refill
|
||||
}.elsewhen(io.cpu.tlb.uncached) {
|
||||
state := s_uncached
|
||||
ar.addr := io.cpu.tlb.paddr
|
||||
ar.len := uncached_len.U
|
||||
ar.size := uncached_size.U
|
||||
arvalid := true.B
|
||||
}.elsewhen(!cache_hit) {
|
||||
state := s_replace
|
||||
// 取指时按bank块取指
|
||||
ar.addr := Cat(io.cpu.tlb.paddr(PADDR_WID - 1, offsetWidth), 0.U(offsetWidth.W))
|
||||
ar.len := cached_len.U
|
||||
ar.size := cached_size.U
|
||||
arvalid := true.B
|
||||
|
||||
replace_wstrb(replace_way).map(_.map(_ := false.B))
|
||||
replace_wstrb(replace_way)(0)(0) := true.B // 从第一个bank的第一个指令块开始写入
|
||||
tag_wstrb(replace_way) := true.B
|
||||
tag_wdata := io.cpu.tlb.ptag
|
||||
valid(replace_way)(replace_index) := true.B
|
||||
}.elsewhen(!io.cpu.icache_stall) {
|
||||
replace_way := ~select_way
|
||||
when(!io.cpu.complete_single_request) {
|
||||
state := s_wait
|
||||
(1 until instFetchNum).foreach(i => rdata_in_wait(i).inst := inst(i))
|
||||
(0 until instFetchNum).foreach(i => rdata_in_wait(i).valid := inst_valid(i))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
is(s_uncached) {
|
||||
when(io.axi.ar.valid) {
|
||||
when(io.axi.ar.ready) {
|
||||
arvalid := false.B
|
||||
rready := true.B
|
||||
}
|
||||
}.elsewhen(io.axi.r.fire) {
|
||||
// * uncached not support burst transport * //
|
||||
rdata_in_wait(0).inst := Mux(ar.addr(2), io.axi.r.bits.data(63, 32), io.axi.r.bits.data(31, 0))
|
||||
rdata_in_wait(0).valid := true.B
|
||||
rready := false.B
|
||||
access_fault := io.axi.r.bits.resp =/= RESP_OKEY.U
|
||||
state := s_wait
|
||||
}
|
||||
}
|
||||
is(s_replace) {
|
||||
when(io.axi.ar.valid) {
|
||||
when(io.axi.ar.ready) {
|
||||
arvalid := false.B
|
||||
rready := true.B
|
||||
}
|
||||
}.elsewhen(io.axi.r.fire) {
|
||||
// * burst transport * //
|
||||
when(!io.axi.r.bits.last) {
|
||||
// 左移写掩码,写入下一个bank,或是同一个bank的下一个指令
|
||||
replace_wstrb(replace_way) :=
|
||||
((replace_wstrb(replace_way).asUInt << 1)).asTypeOf(replace_wstrb(replace_way))
|
||||
}.otherwise {
|
||||
rready := false.B
|
||||
replace_wstrb(replace_way).map(_.map(_ := false.B))
|
||||
tag_wstrb(replace_way) := false.B
|
||||
}
|
||||
}.elsewhen(!io.axi.r.ready) {
|
||||
state := s_idle
|
||||
}
|
||||
}
|
||||
is(s_wait) {
|
||||
// 等待流水线的allow_to_go信号,防止多次发出读请求
|
||||
when(io.cpu.complete_single_request) {
|
||||
access_fault := false.B // 清除access_fault
|
||||
page_fault := false.B // 清除page_fault
|
||||
addr_misaligned := false.B // 清除addr_misaligned
|
||||
state := s_idle
|
||||
(0 until instFetchNum).foreach(i => rdata_in_wait(i).valid := false.B)
|
||||
}
|
||||
}
|
||||
is(s_fence) {
|
||||
// 等待dcache完成写回操作,且等待axi总线完成读取操作,因为icache发生状态转移时可能正在读取数据
|
||||
when(!io.cpu.dcache_stall && !io.axi.r.valid) {
|
||||
state := s_idle
|
||||
}
|
||||
}
|
||||
is(s_tlb_refill) {
|
||||
when(io.cpu.tlb.access_fault) {
|
||||
access_fault := true.B
|
||||
state := s_wait
|
||||
rdata_in_wait(0).inst := Instructions.NOP
|
||||
rdata_in_wait(0).valid := true.B
|
||||
}.elsewhen(io.cpu.tlb.page_fault) {
|
||||
page_fault := true.B
|
||||
state := s_wait
|
||||
rdata_in_wait(0).inst := Instructions.NOP
|
||||
rdata_in_wait(0).valid := true.B
|
||||
}.otherwise {
|
||||
when(io.cpu.tlb.hit) {
|
||||
state := s_idle
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// * fence * //
|
||||
// 不论icache在什么状态,fence指令优先度最高,会强制将icache状态转移为s_fence
|
||||
when(io.cpu.fence_i) {
|
||||
valid := 0.U.asTypeOf(valid) // fence.i指令需要icache,等同于将所有valid位置0
|
||||
state := s_fence
|
||||
}
|
||||
|
||||
println("----------------------------------------")
|
||||
println("ICache: ")
|
||||
println("nindex: " + nindex)
|
||||
println("nbank: " + nbank)
|
||||
println("bankOffsetWidth: " + bankOffsetWidth)
|
||||
println("bytesPerBank: " + bytesPerBank)
|
||||
println("tagWidth: " + tagWidth)
|
||||
println("indexWidth: " + indexWidth)
|
||||
println("offsetWidth: " + offsetWidth)
|
||||
println("----------------------------------------")
|
||||
}
|
|
@ -1,64 +0,0 @@
|
|||
package cache.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.CpuConfig
|
||||
|
||||
/** LUT ram for XPM, one port for read/write, one port for read
|
||||
* @param depth
|
||||
* how many lines there are in the bank
|
||||
* @param width
|
||||
* how wide in bits each line is
|
||||
* @param config
|
||||
* implicit configuration to control generate ram for simulation or elaboration
|
||||
*/
|
||||
class LUTRam(depth: Int, width: Int)(implicit val cpuConfig: CpuConfig) extends Module {
|
||||
require(isPow2(depth))
|
||||
val waddridth = log2Ceil(depth)
|
||||
val io = IO(new Bundle {
|
||||
val raddr = Input(UInt(waddridth.W))
|
||||
val rdata = Output(UInt(width.W))
|
||||
|
||||
val waddr = Input(UInt(waddridth.W))
|
||||
val wdata = Input(UInt(width.W))
|
||||
val wen = Input(Bool())
|
||||
val writeOutput = Output(UInt(width.W))
|
||||
})
|
||||
|
||||
if (cpuConfig.build) {
|
||||
val bank = Module(
|
||||
new LUTRamIP(
|
||||
wdataidth = width,
|
||||
waddridth = waddridth,
|
||||
byteWriteWidth = width,
|
||||
numberOfLines = depth
|
||||
)
|
||||
)
|
||||
bank.io.clka := clock
|
||||
bank.io.clkb := clock
|
||||
bank.io.rsta := reset
|
||||
bank.io.rstb := reset
|
||||
|
||||
bank.io.regcea := false.B
|
||||
bank.io.regceb := false.B
|
||||
bank.io.ena := true.B
|
||||
bank.io.enb := true.B
|
||||
|
||||
bank.io.addra := io.waddr
|
||||
bank.io.wea := io.wen
|
||||
bank.io.dina := io.wdata
|
||||
io.writeOutput := DontCare
|
||||
|
||||
bank.io.addrb := io.raddr
|
||||
io.rdata := bank.io.doutb
|
||||
} else {
|
||||
val bank = RegInit(VecInit(Seq.fill(depth)(0.U(width.W))))
|
||||
io.rdata := bank(io.raddr)
|
||||
io.writeOutput := DontCare
|
||||
when(io.wen) {
|
||||
bank(io.waddr) := io.wdata
|
||||
}.otherwise {
|
||||
io.writeOutput := bank(io.waddr)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
package cache.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util.log2Ceil
|
||||
|
||||
/** XPM 2019.2 XPM_MEMORY_DPDISTRAM, at page 124 of UG953(2019.2) by default, this is initialized to
|
||||
* all 0
|
||||
*
|
||||
* @param wdataidth
|
||||
* : the size of the data to store in each line, in bits
|
||||
* @param waddridth
|
||||
* : the width of request
|
||||
* @param byteWriteWidth
|
||||
* : addressable size of write
|
||||
* @param numberOfLines
|
||||
* : how many **bits** there are in the memory
|
||||
*/
|
||||
class LUTRamIP(wdataidth: Int, waddridth: Int, byteWriteWidth: Int, numberOfLines: Int)
|
||||
extends BlackBox(
|
||||
Map(
|
||||
"ADDR_WIDTH_A" -> waddridth,
|
||||
"ADDR_WIDTH_B" -> waddridth,
|
||||
"MEMORY_SIZE" -> numberOfLines * wdataidth,
|
||||
"WRITE_DATA_WIDTH_A" -> wdataidth,
|
||||
"READ_DATA_WIDTH_A" -> wdataidth,
|
||||
"READ_DATA_WIDTH_B" -> wdataidth,
|
||||
"BYTE_WRITE_WIDTH_A" -> byteWriteWidth,
|
||||
"READ_LATENCY_A" -> 0,
|
||||
"READ_LATENCY_B" -> 0,
|
||||
"READ_RESET_VALUE_A" -> 0,
|
||||
"READ_RESET_VALUE_B" -> 0,
|
||||
"CLOCKING_MODE" -> "common_clock",
|
||||
),
|
||||
) {
|
||||
override def desiredName: String = "xpm_memory_dpdistram"
|
||||
require(
|
||||
waddridth == log2Ceil(numberOfLines),
|
||||
"request width should be log 2 of number of lines to request all",
|
||||
)
|
||||
require(
|
||||
wdataidth - (wdataidth / byteWriteWidth) * byteWriteWidth == 0,
|
||||
"data width should be a multiple of byte write width",
|
||||
)
|
||||
require(waddridth <= 20, "request width should be 1 to 20")
|
||||
val io = IO(new Bundle {
|
||||
val clka = Input(Clock())
|
||||
val clkb = Input(Clock())
|
||||
val rsta = Input(Reset())
|
||||
val rstb = Input(Reset())
|
||||
|
||||
val ena = Input(Bool())
|
||||
val enb = Input(Bool())
|
||||
val regcea = Input(Bool())
|
||||
val regceb = Input(Bool())
|
||||
|
||||
val dina = Input(UInt(wdataidth.W))
|
||||
val addra = Input(UInt(waddridth.W))
|
||||
val addrb = Input(UInt(waddridth.W))
|
||||
|
||||
val wea = Input(UInt((wdataidth / byteWriteWidth).W))
|
||||
|
||||
val douta = Output(UInt(wdataidth.W))
|
||||
val doutb = Output(UInt(wdataidth.W))
|
||||
})
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
package cache.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.CacheConfig
|
||||
|
||||
class ReadOnlyPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
|
||||
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
|
||||
val data = Output(gen)
|
||||
}
|
||||
|
||||
class WriteOnlyPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
|
||||
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
|
||||
val en = Input(Bool())
|
||||
val data = Input(gen)
|
||||
}
|
||||
|
||||
class WriteOnlyMaskPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
|
||||
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
|
||||
val en = Input(UInt(cacheConfig.bytesPerBank.W))
|
||||
val data = Input(gen)
|
||||
}
|
||||
|
||||
|
||||
class ReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
|
||||
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
|
||||
val en = Input(Bool())
|
||||
val wdata = Input(gen)
|
||||
val rdata = Output(gen)
|
||||
}
|
||||
|
||||
class MaskedReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle {
|
||||
val addr = Input(UInt(log2Ceil(cacheConfig.nindex * cacheConfig.nbank).W))
|
||||
val writeMask = Input(UInt(cacheConfig.bytesPerBank.W))
|
||||
val wdata = Input(gen)
|
||||
val rdata = Output(gen)
|
||||
}
|
|
@ -1,96 +0,0 @@
|
|||
package cache.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.stage.{ChiselGeneratorAnnotation, ChiselStage}
|
||||
import chisel3.util._
|
||||
import cpu.CpuConfig
|
||||
|
||||
/** simple dual port ram, with a port for reading and a port for writing
|
||||
*
|
||||
* @param depth
|
||||
* how many lines there are in the ram
|
||||
* @param width
|
||||
* how wide in bits each line is
|
||||
* @param byteAddressable
|
||||
* is it byte addressable?
|
||||
* @param cpuCfg
|
||||
* the implicit configuration for simulation and elaboration
|
||||
*/
|
||||
class SimpleDualPortRam(
|
||||
depth: Int,
|
||||
width: Int,
|
||||
byteAddressable: Boolean
|
||||
)(
|
||||
implicit
|
||||
val cpuConfig: CpuConfig)
|
||||
extends Module {
|
||||
require(isPow2(depth))
|
||||
require(
|
||||
width % 8 == 0 || !byteAddressable,
|
||||
"if memory is byte addressable, then the adderss width must be a multiple of 8"
|
||||
)
|
||||
val waddridth = log2Ceil(depth)
|
||||
|
||||
val io = IO(new Bundle {
|
||||
val raddr = Input(UInt(waddridth.W))
|
||||
val ren = Input(Bool())
|
||||
val rdata = Output(UInt(width.W))
|
||||
|
||||
val waddr = Input(UInt(waddridth.W))
|
||||
val wen = Input(Bool())
|
||||
val wstrb = Input(UInt((if (byteAddressable) width / 8 else 1).W))
|
||||
val wdata = Input(UInt(width.W))
|
||||
})
|
||||
|
||||
if (cpuConfig.build) {
|
||||
val memory = Module(
|
||||
new SimpleDualPortRamIP(
|
||||
wdataidth = width,
|
||||
byteWriteWidth = if (byteAddressable) 8 else width,
|
||||
numberOfLines = depth,
|
||||
waddridth = waddridth
|
||||
)
|
||||
)
|
||||
memory.io.clka := clock
|
||||
memory.io.clkb := clock
|
||||
memory.io.rstb := reset
|
||||
|
||||
memory.io.addra := io.waddr
|
||||
memory.io.ena := io.wen
|
||||
memory.io.dina := io.wdata
|
||||
memory.io.wea := io.wstrb
|
||||
|
||||
memory.io.addrb := io.raddr
|
||||
memory.io.enb := io.ren
|
||||
memory.io.regceb := false.B
|
||||
io.rdata := memory.io.doutb
|
||||
} else {
|
||||
assert(
|
||||
io.wstrb.orR || !io.wen,
|
||||
"when write port enable is high, write vector cannot be all 0"
|
||||
)
|
||||
if (byteAddressable) {
|
||||
val bank = SyncReadMem(depth, Vec(width / 8, UInt(8.W)))
|
||||
when(io.ren) {
|
||||
io.rdata := bank.read(io.raddr).asTypeOf(UInt(width.W))
|
||||
}.otherwise {
|
||||
io.rdata := DontCare
|
||||
}
|
||||
when(io.wen) {
|
||||
bank.write(io.waddr, io.wdata.asTypeOf(Vec(width / 8, UInt(8.W))), io.wstrb.asBools)
|
||||
}
|
||||
} else {
|
||||
val bank = SyncReadMem(depth, UInt(width.W))
|
||||
|
||||
when(io.ren) {
|
||||
io.rdata := bank.read(io.raddr)
|
||||
}.otherwise {
|
||||
io.rdata := 0.U(32.W)
|
||||
}
|
||||
|
||||
when(io.wen) {
|
||||
bank.write(io.waddr, io.wdata)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
package cache.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util.log2Ceil
|
||||
|
||||
/** simple dual port ram
|
||||
*
|
||||
* @param wdataidth
|
||||
* : width of every data line
|
||||
* @param byteWriteWidth
|
||||
* : how many bits to write per mask
|
||||
* @param numberOfLines
|
||||
* : how many lines of data are in the ram
|
||||
* @param waddridth
|
||||
* : how wide is the request (to cover all lines)
|
||||
* @param memoryPrimitive
|
||||
* : should I use auto, block ram or distributed ram
|
||||
*/
|
||||
class SimpleDualPortRamIP(
|
||||
wdataidth: Int = 32,
|
||||
byteWriteWidth: Int = 8,
|
||||
numberOfLines: Int,
|
||||
waddridth: Int,
|
||||
memoryPrimitive: String = "block",
|
||||
) extends BlackBox(
|
||||
Map(
|
||||
"ADDR_WIDTH_A" -> waddridth,
|
||||
"ADDR_WIDTH_B" -> waddridth,
|
||||
"WRITE_DATA_WIDTH_A" -> wdataidth,
|
||||
"READ_DATA_WIDTH_B" -> wdataidth,
|
||||
"BYTE_WRITE_WIDTH_A" -> byteWriteWidth,
|
||||
"CLOCKING_MODE" -> "common_clock",
|
||||
"READ_LATENCY_B" -> 1,
|
||||
"MEMORY_SIZE" -> numberOfLines * wdataidth,
|
||||
"MEMORY_PRIMITIVE" -> memoryPrimitive,
|
||||
),
|
||||
) {
|
||||
override def desiredName: String = "xpm_memory_sdpram"
|
||||
require(waddridth <= 20, "request width should be 1 to 20")
|
||||
require(
|
||||
wdataidth - (wdataidth / byteWriteWidth) * byteWriteWidth == 0,
|
||||
"data width should be a multiple of byte write width",
|
||||
)
|
||||
require(
|
||||
List("auto", "block", "distributed", "ultra").contains(memoryPrimitive),
|
||||
"memory primitive should be auto, block ram, dist ram or ultra ram",
|
||||
)
|
||||
require(
|
||||
waddridth == log2Ceil(numberOfLines),
|
||||
"request width should be log 2 of number of lines to request all",
|
||||
)
|
||||
val io = IO(new Bundle {
|
||||
// clock and reset
|
||||
val clka = Input(Clock())
|
||||
val clkb = Input(Clock())
|
||||
val rstb = Input(Reset())
|
||||
|
||||
val addra = Input(UInt(waddridth.W))
|
||||
val dina = Input(UInt(wdataidth.W))
|
||||
val ena = Input(Bool())
|
||||
val wea = Input(UInt((wdataidth / byteWriteWidth).W))
|
||||
|
||||
val addrb = Input(UInt(waddridth.W))
|
||||
val enb = Input(Bool())
|
||||
val regceb = Input(Bool())
|
||||
val doutb = Output(UInt(wdataidth.W))
|
||||
})
|
||||
}
|
|
@ -1,428 +0,0 @@
|
|||
package icache.mmu
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines._
|
||||
import cpu.defines.Const._
|
||||
import cpu.CacheConfig
|
||||
import cpu.pipeline.execute.CsrTlb
|
||||
import cpu.CpuConfig
|
||||
|
||||
object AccessType {
|
||||
def apply() = UInt(2.W)
|
||||
def fetch = "b00".U
|
||||
def load = "b01".U
|
||||
def store = "b10".U
|
||||
}
|
||||
|
||||
class Tlb_Ptw extends Bundle with HasTlbConst {
|
||||
val vpn = Decoupled(UInt(vpnLen.W))
|
||||
val access_type = Output(AccessType())
|
||||
val pte = Flipped(Decoupled(new Bundle {
|
||||
val access_fault = Bool()
|
||||
val page_fault = Bool()
|
||||
val entry = pteBundle
|
||||
val rmask = UInt(maskLen.W)
|
||||
}))
|
||||
}
|
||||
|
||||
class Tlb_ICache extends Bundle with HasTlbConst {
|
||||
val en = Input(Bool())
|
||||
val vaddr = Input(UInt(XLEN.W))
|
||||
val complete_single_request = Input(Bool())
|
||||
|
||||
val uncached = Output(Bool())
|
||||
val hit = Output(Bool())
|
||||
val ptag = Output(UInt(cacheTagLen.W))
|
||||
val paddr = Output(UInt(PADDR_WID.W))
|
||||
val access_fault = Output(Bool())
|
||||
val page_fault = Output(Bool())
|
||||
}
|
||||
|
||||
class Tlb_DCache extends Tlb_ICache {
|
||||
val access_type = Input(AccessType())
|
||||
|
||||
// ptw 相关参数
|
||||
val ptw = new Tlb_Ptw()
|
||||
val csr = new CsrTlb()
|
||||
}
|
||||
|
||||
class Tlb extends Module with HasTlbConst with HasCSRConst {
|
||||
val io = IO(new Bundle {
|
||||
val icache = new Tlb_ICache()
|
||||
val dcache = new Tlb_DCache()
|
||||
val csr = Flipped(new CsrTlb())
|
||||
val sfence_vma = Input(new MouTlb())
|
||||
})
|
||||
|
||||
val satp = io.csr.satp.asTypeOf(satpBundle)
|
||||
val mstatus = io.csr.mstatus.asTypeOf(new Mstatus)
|
||||
val imode = io.csr.imode
|
||||
val dmode = io.csr.dmode
|
||||
// 当SUM=0时,S模式内存访问U模式可访问的页面(U=1)将出现故障。
|
||||
// 当SUM=1时,这些访问是允许的。当基于页面的虚拟内存不生效时,SUM无效。
|
||||
// 请注意,虽然SUM通常在不在S模式下执行时被忽略,但当MPRV=1和MPP=S时,SUM有效。
|
||||
val sum = mstatus.sum
|
||||
// 当MXR=0时,只有标记为可读的页面(R=1)的加载才会成功。
|
||||
// 当MXR=1时,标记为可读或可执行的页面(R=1或X=1)的加载才会成功。
|
||||
// 当基于页面的虚拟内存无效时,MXR无效。
|
||||
val mxr = mstatus.mxr
|
||||
|
||||
// 只有当satp.mode为8且当前模式低于M模式时,才启用虚拟内存
|
||||
val ivm_enabled = (satp.mode === 8.U) && (imode < ModeM)
|
||||
val dvm_enabled = (satp.mode === 8.U) && (dmode < ModeM)
|
||||
|
||||
val itlb = RegInit(0.U.asTypeOf(tlbBundle))
|
||||
val dtlb = RegInit(0.U.asTypeOf(tlbBundle))
|
||||
val tlbl2 = RegInit(VecInit(Seq.fill(cpuConfig.tlbEntries)(0.U.asTypeOf(tlbBundle))))
|
||||
|
||||
val ivpn = io.icache.vaddr(VADDR_WID - 1, pageOffsetLen)
|
||||
val dvpn = io.dcache.vaddr(VADDR_WID - 1, pageOffsetLen)
|
||||
|
||||
// 当(VPN一致)且(ASID一致或PTE.G为1时)且(PTE.V为1)时,TLB命中
|
||||
val itlbl1_hit = vpnEq(itlb.rmask, ivpn, itlb.vpn) &&
|
||||
(itlb.asid === satp.asid || itlb.flag.g) &&
|
||||
itlb.flag.v
|
||||
val dtlbl1_hit = vpnEq(dtlb.rmask, dvpn, dtlb.vpn) &&
|
||||
(dtlb.asid === satp.asid || dtlb.flag.g) &&
|
||||
dtlb.flag.v
|
||||
|
||||
val il2_hit_vec = VecInit(
|
||||
tlbl2.map(tlb =>
|
||||
vpnEq(tlb.rmask, ivpn, tlb.vpn) &&
|
||||
(tlb.asid === satp.asid || tlb.flag.g) &&
|
||||
tlb.flag.v
|
||||
)
|
||||
)
|
||||
val dl2_hit_vec = VecInit(
|
||||
tlbl2.map(tlb =>
|
||||
vpnEq(tlb.rmask, dvpn, tlb.vpn) &&
|
||||
(tlb.asid === satp.asid || tlb.flag.g) &&
|
||||
tlb.flag.v
|
||||
)
|
||||
)
|
||||
|
||||
val search_l1 :: search_l2 :: search_pte :: search_fault :: Nil = Enum(4)
|
||||
val immu_state = RegInit(search_l1)
|
||||
val dmmu_state = RegInit(search_l1)
|
||||
|
||||
// 使用随机的方法替换TLB条目
|
||||
val replace_index = new Counter(cpuConfig.tlbEntries)
|
||||
|
||||
val ipage_fault = RegInit(false.B)
|
||||
val dpage_fault = RegInit(false.B)
|
||||
val iaccess_fault = RegInit(false.B)
|
||||
val daccess_fault = RegInit(false.B)
|
||||
|
||||
// ptw的请求标志,0位为指令tlb请求,1位为数据tlb请求
|
||||
val req_ptw = WireInit(VecInit(Seq.fill(2)(false.B)))
|
||||
|
||||
val ar_sel_lock = RegInit(false.B)
|
||||
val ar_sel_val = RegInit(false.B)
|
||||
// 我们默认优先发送数据tlb的请求
|
||||
val choose_icache = Mux(ar_sel_lock, ar_sel_val, req_ptw(0) && !req_ptw(1))
|
||||
|
||||
when(io.dcache.ptw.vpn.valid) {
|
||||
when(io.dcache.ptw.vpn.ready) {
|
||||
ar_sel_lock := false.B
|
||||
}.otherwise {
|
||||
ar_sel_lock := true.B
|
||||
ar_sel_val := choose_icache
|
||||
}
|
||||
}
|
||||
|
||||
io.icache.hit := false.B
|
||||
io.dcache.hit := false.B
|
||||
io.icache.access_fault := iaccess_fault
|
||||
io.dcache.access_fault := daccess_fault
|
||||
io.icache.page_fault := ipage_fault
|
||||
io.dcache.page_fault := dpage_fault
|
||||
|
||||
// 将ptw模块集成到dcache中,ptw通过dcache的axi进行内存访问
|
||||
io.dcache.ptw.vpn.valid := Mux(choose_icache, req_ptw(0), req_ptw(1))
|
||||
io.dcache.ptw.access_type := Mux(choose_icache, AccessType.fetch, io.dcache.access_type)
|
||||
io.dcache.ptw.vpn.bits := Mux(choose_icache, ivpn, dvpn)
|
||||
io.dcache.ptw.pte.ready := true.B // 恒为true
|
||||
io.dcache.csr <> io.csr
|
||||
|
||||
def imodeCheck(): Unit = {
|
||||
switch(imode) {
|
||||
is(ModeS) {
|
||||
when(itlb.flag.u && sum === 0.U) {
|
||||
ipage_fault := true.B
|
||||
immu_state := search_fault
|
||||
}.otherwise {
|
||||
io.icache.hit := true.B
|
||||
}
|
||||
}
|
||||
is(ModeU) {
|
||||
when(!itlb.flag.u) {
|
||||
ipage_fault := true.B
|
||||
immu_state := search_fault
|
||||
}.otherwise {
|
||||
io.icache.hit := true.B
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def dmodeCheck(): Unit = {
|
||||
switch(dmode) {
|
||||
is(ModeS) {
|
||||
when(dtlb.flag.u && sum === 0.U) {
|
||||
dpage_fault := true.B
|
||||
dmmu_state := search_fault
|
||||
}.otherwise {
|
||||
io.dcache.hit := true.B
|
||||
}
|
||||
}
|
||||
is(ModeU) {
|
||||
when(!dtlb.flag.u) {
|
||||
dpage_fault := true.B
|
||||
dmmu_state := search_fault
|
||||
}.otherwise {
|
||||
io.dcache.hit := true.B
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------
|
||||
// ----------------- 指令虚实地址转换 -----------------
|
||||
// ---------------------------------------------------
|
||||
switch(immu_state) {
|
||||
is(search_l1) {
|
||||
when(io.icache.en) {
|
||||
// 在icache实现访问tlb的pma和pmp权限检查
|
||||
ipage_fault := false.B
|
||||
iaccess_fault := false.B
|
||||
when(!ivm_enabled) {
|
||||
io.icache.hit := true.B
|
||||
}.elsewhen(itlbl1_hit) {
|
||||
// 在这里进行取指需要的所有的权限检查
|
||||
// 0. X位检查,只有可执行的页面才能取指
|
||||
// 1. M模式,不可能到这里,因为vm_enabled为false
|
||||
// 2. S模式,如果U位为1,需要检查SUM
|
||||
// 3. U模式,必须保证U位为1
|
||||
io.icache.hit := false.B // 只有权限检查通过后可以置为true
|
||||
when(!itlb.flag.x) {
|
||||
ipage_fault := true.B
|
||||
immu_state := search_fault
|
||||
}.otherwise {
|
||||
imodeCheck()
|
||||
}
|
||||
}.otherwise {
|
||||
immu_state := search_l2
|
||||
}
|
||||
}
|
||||
}
|
||||
is(search_l2) {
|
||||
when(il2_hit_vec.asUInt.orR) {
|
||||
immu_state := search_l1
|
||||
itlb := tlbl2(PriorityEncoder(il2_hit_vec))
|
||||
}.otherwise {
|
||||
req_ptw(0) := true.B
|
||||
when(choose_icache && io.dcache.ptw.vpn.ready) {
|
||||
immu_state := search_pte
|
||||
}
|
||||
}
|
||||
}
|
||||
is(search_pte) {
|
||||
req_ptw(0) := true.B
|
||||
when(io.dcache.ptw.pte.valid) {
|
||||
when(io.dcache.ptw.pte.bits.access_fault) {
|
||||
iaccess_fault := true.B
|
||||
immu_state := search_fault
|
||||
}.elsewhen(io.dcache.ptw.pte.bits.page_fault) {
|
||||
ipage_fault := true.B
|
||||
immu_state := search_fault
|
||||
}.otherwise {
|
||||
// 在内存中找寻到了页表,将其写入TLB
|
||||
val replace_entry = Wire(tlbBundle)
|
||||
replace_entry.vpn := ivpn
|
||||
replace_entry.asid := satp.asid
|
||||
replace_entry.flag := io.dcache.ptw.pte.bits.entry.flag
|
||||
replace_entry.ppn := io.dcache.ptw.pte.bits.entry.ppn
|
||||
replace_entry.rmask := io.dcache.ptw.pte.bits.rmask
|
||||
tlbl2(replace_index.value) := replace_entry
|
||||
itlb := replace_entry
|
||||
replace_index.inc()
|
||||
immu_state := search_l1
|
||||
}
|
||||
}
|
||||
}
|
||||
is(search_fault) {
|
||||
when(io.icache.complete_single_request) {
|
||||
ipage_fault := false.B
|
||||
iaccess_fault := false.B
|
||||
immu_state := search_l1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------
|
||||
// ----------------- 数据虚实地址转换 -----------------
|
||||
// ---------------------------------------------------
|
||||
switch(dmmu_state) {
|
||||
is(search_l1) {
|
||||
when(io.dcache.en) {
|
||||
// 在dcache实现访问tlb的pma和pmp权限检查
|
||||
dpage_fault := false.B
|
||||
daccess_fault := false.B
|
||||
when(!dvm_enabled) {
|
||||
io.dcache.hit := true.B
|
||||
}.elsewhen(dtlbl1_hit) {
|
||||
// 在这里进行取指需要的所有的权限检查
|
||||
// 如果是load
|
||||
// 0. MXR位检查,分类0和1的情况
|
||||
// 1. M模式,不可能到这里,因为vm_enabled为false
|
||||
// 2. S模式,如果U位为1,需要检查SUM
|
||||
// 3. U模式,必须保证U位为1
|
||||
io.dcache.hit := false.B // 只有权限检查通过后可以置为true
|
||||
switch(io.dcache.access_type) {
|
||||
is(AccessType.load) {
|
||||
when(mxr) {
|
||||
when(!dtlb.flag.r && !dtlb.flag.x) {
|
||||
dpage_fault := true.B
|
||||
dmmu_state := search_fault
|
||||
}.otherwise {
|
||||
dmodeCheck()
|
||||
}
|
||||
}.otherwise {
|
||||
when(!dtlb.flag.r) {
|
||||
dpage_fault := true.B
|
||||
dmmu_state := search_fault
|
||||
}.otherwise {
|
||||
dmodeCheck()
|
||||
}
|
||||
}
|
||||
}
|
||||
is(AccessType.store) {
|
||||
when(!dtlb.flag.d) {
|
||||
dpage_fault := true.B
|
||||
dmmu_state := search_fault
|
||||
}.otherwise {
|
||||
when(!dtlb.flag.w) {
|
||||
dpage_fault := true.B
|
||||
dmmu_state := search_fault
|
||||
}.otherwise {
|
||||
dmodeCheck()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}.otherwise {
|
||||
dmmu_state := search_l2
|
||||
}
|
||||
}
|
||||
}
|
||||
is(search_l2) {
|
||||
when(dl2_hit_vec.asUInt.orR) {
|
||||
dmmu_state := search_l1
|
||||
dtlb := tlbl2(PriorityEncoder(dl2_hit_vec))
|
||||
}.otherwise {
|
||||
req_ptw(1) := true.B
|
||||
when(!choose_icache && io.dcache.ptw.vpn.ready) {
|
||||
dmmu_state := search_pte
|
||||
}
|
||||
}
|
||||
}
|
||||
is(search_pte) {
|
||||
req_ptw(1) := true.B
|
||||
when(io.dcache.ptw.pte.valid) {
|
||||
when(io.dcache.ptw.pte.bits.access_fault) {
|
||||
daccess_fault := true.B
|
||||
dmmu_state := search_fault
|
||||
}.elsewhen(io.dcache.ptw.pte.bits.page_fault) {
|
||||
dpage_fault := true.B
|
||||
dmmu_state := search_fault
|
||||
}.otherwise {
|
||||
// 在内存中找寻到了页表,将其写入TLB
|
||||
val replace_entry = Wire(tlbBundle)
|
||||
replace_entry.vpn := dvpn
|
||||
replace_entry.asid := satp.asid
|
||||
replace_entry.flag := io.dcache.ptw.pte.bits.entry.flag
|
||||
replace_entry.ppn := io.dcache.ptw.pte.bits.entry.ppn
|
||||
replace_entry.rmask := io.dcache.ptw.pte.bits.rmask
|
||||
tlbl2(replace_index.value) := replace_entry
|
||||
dtlb := replace_entry
|
||||
replace_index.inc()
|
||||
dmmu_state := search_l1
|
||||
}
|
||||
}
|
||||
}
|
||||
is(search_fault) {
|
||||
when(io.dcache.complete_single_request) {
|
||||
dpage_fault := false.B
|
||||
daccess_fault := false.B
|
||||
dmmu_state := search_l1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vpn
|
||||
val src1 = io.sfence_vma.src_info.src1_data(vpnLen - 1, pageOffsetLen)
|
||||
// asid
|
||||
val src2 = io.sfence_vma.src_info.src2_data(asidLen - 1, 0)
|
||||
when(io.sfence_vma.valid) {
|
||||
when(!src1.orR && !src2.orR) {
|
||||
// 将所有tlb的有效位置为0
|
||||
itlb.flag.v := false.B
|
||||
dtlb.flag.v := false.B
|
||||
for (i <- 0 until cpuConfig.tlbEntries) {
|
||||
tlbl2(i).flag.v := false.B
|
||||
}
|
||||
}.elsewhen(!src1.orR && src2.orR) {
|
||||
// 将asid一致的且g不为1的tlb的有效位置为0
|
||||
when(itlb.asid === src2 && !itlb.flag.g) {
|
||||
itlb.flag.v := false.B
|
||||
}
|
||||
when(dtlb.asid === src2 && !dtlb.flag.g) {
|
||||
dtlb.flag.v := false.B
|
||||
}
|
||||
for (i <- 0 until cpuConfig.tlbEntries) {
|
||||
when(tlbl2(i).asid === src2 && !tlbl2(i).flag.g) {
|
||||
tlbl2(i).flag.v := false.B
|
||||
}
|
||||
}
|
||||
}.elsewhen(src1.orR && !src2.orR) {
|
||||
// 将vpn一致的tlb的有效位置为0
|
||||
when(vpnEq(itlb.rmask, src1, itlb.vpn)) {
|
||||
itlb.flag.v := false.B
|
||||
}
|
||||
when(vpnEq(dtlb.rmask, src1, dtlb.vpn)) {
|
||||
dtlb.flag.v := false.B
|
||||
}
|
||||
for (i <- 0 until cpuConfig.tlbEntries) {
|
||||
when(vpnEq(tlbl2(i).rmask, src1, tlbl2(i).vpn)) {
|
||||
tlbl2(i).flag.v := false.B
|
||||
}
|
||||
}
|
||||
}.elsewhen(src1.orR && src2.orR) {
|
||||
// 将asid一致的且vpn一致的tlb的有效位置为0,g为1的除外
|
||||
when(itlb.asid === src2 && vpnEq(itlb.rmask, src1, itlb.vpn) && !itlb.flag.g) {
|
||||
itlb.flag.v := false.B
|
||||
}
|
||||
when(dtlb.asid === src2 && vpnEq(dtlb.rmask, src1, dtlb.vpn) && !dtlb.flag.g) {
|
||||
dtlb.flag.v := false.B
|
||||
}
|
||||
for (i <- 0 until cpuConfig.tlbEntries) {
|
||||
when(tlbl2(i).asid === src2 && vpnEq(tlbl2(i).rmask, src1, tlbl2(i).vpn) && !tlbl2(i).flag.g) {
|
||||
tlbl2(i).flag.v := false.B
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val imasktag = maskTag(itlb.rmask, itlb.ppn, ivpn)
|
||||
val dmasktag = maskTag(dtlb.rmask, dtlb.ppn, dvpn)
|
||||
|
||||
io.icache.uncached := AddressSpace.isMMIO(io.icache.vaddr)
|
||||
io.icache.ptag := Mux(ivm_enabled, imasktag, ivpn)
|
||||
io.icache.paddr := Cat(io.icache.ptag, io.icache.vaddr(pageOffsetLen - 1, 0))
|
||||
|
||||
io.dcache.uncached := AddressSpace.isMMIO(io.dcache.vaddr)
|
||||
io.dcache.ptag := Mux(dvm_enabled, dmasktag, dvpn)
|
||||
io.dcache.paddr := Cat(io.dcache.ptag, io.dcache.vaddr(pageOffsetLen - 1, 0))
|
||||
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
package cpu.defines
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines.Const._
|
||||
import cpu.CpuConfig
|
||||
|
||||
class SocStatistic extends Bundle {
|
||||
val csr_count = Output(UInt(32.W))
|
||||
val csr_random = Output(UInt(32.W))
|
||||
val csr_cause = Output(UInt(32.W))
|
||||
val int = Output(Bool())
|
||||
val commit = Output(Bool())
|
||||
}
|
||||
|
||||
class BranchPredictorUnitStatistic extends Bundle {
|
||||
val branch = Output(UInt(32.W))
|
||||
val success = Output(UInt(32.W))
|
||||
}
|
||||
|
||||
class CPUStatistic extends Bundle {
|
||||
val soc = new SocStatistic()
|
||||
val bpu = new BranchPredictorUnitStatistic()
|
||||
}
|
||||
|
||||
class GlobalStatistic extends Bundle {
|
||||
val cpu = new CPUStatistic()
|
||||
val cache = new CacheStatistic()
|
||||
}
|
||||
|
||||
class ICacheStatistic extends Bundle {
|
||||
val request = Output(UInt(32.W))
|
||||
val hit = Output(UInt(32.W))
|
||||
}
|
||||
|
||||
class DCacheStatistic extends Bundle {
|
||||
val request = Output(UInt(32.W))
|
||||
val hit = Output(UInt(32.W))
|
||||
}
|
||||
|
||||
class CacheStatistic extends Bundle {
|
||||
val icache = new ICacheStatistic()
|
||||
val dcache = new DCacheStatistic()
|
||||
}
|
|
@ -1,102 +0,0 @@
|
|||
package cpu.defines
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines.Const._
|
||||
import cpu.CacheConfig
|
||||
import cpu.CpuConfig
|
||||
|
||||
trait HasTlbConst extends CoreParameter {
|
||||
val PAddrBits = PADDR_WID // 32
|
||||
val level = 3
|
||||
val pageOffsetLen = 12 // 页面大小为4KB,对应的偏移量长度为12位
|
||||
val ppn0Len = 9
|
||||
val ppn1Len = 9
|
||||
val ppn2Len = PAddrBits - pageOffsetLen - ppn0Len - ppn1Len // 2
|
||||
val ppnLen = ppn2Len + ppn1Len + ppn0Len // 20
|
||||
val vpn2Len = 9
|
||||
val vpn1Len = 9
|
||||
val vpn0Len = 9
|
||||
val vpnLen = vpn2Len + vpn1Len + vpn0Len // 27
|
||||
val maskLen = ppn1Len + ppn0Len // 18
|
||||
|
||||
val satpLen = XLEN
|
||||
val satpModeLen = 4
|
||||
val asidLen = 16
|
||||
val flagLen = 8
|
||||
|
||||
val ptEntryLen = XLEN
|
||||
val satpResLen = XLEN - ppnLen - satpModeLen - asidLen
|
||||
val pteResLen = XLEN - ppnLen - 2 - flagLen
|
||||
|
||||
val cacheTagLen = PADDR_WID - pageOffsetLen // 32 - 12 = 20
|
||||
require(ppnLen == cacheTagLen)
|
||||
|
||||
def vpnEq(mask: UInt, vpn: UInt, tlbvpn: UInt) = {
|
||||
val fullmask = Cat(Fill(vpn2Len, true.B), mask)
|
||||
(vpn & fullmask) === (tlbvpn & fullmask)
|
||||
}
|
||||
|
||||
def maskTag(mask: UInt, ppn: UInt, vpn: UInt) = {
|
||||
val fullmask = Cat(Fill(ppn2Len, true.B), mask)
|
||||
(ppn & fullmask) | (vpn & ~fullmask)
|
||||
}
|
||||
|
||||
def vpnBundle = new Bundle {
|
||||
val vpn2 = UInt(vpn2Len.W)
|
||||
val vpn1 = UInt(vpn1Len.W)
|
||||
val vpn0 = UInt(vpn0Len.W)
|
||||
}
|
||||
|
||||
def ppnBundle = new Bundle {
|
||||
val ppn2 = UInt(ppn2Len.W)
|
||||
val ppn1 = UInt(ppn1Len.W)
|
||||
val ppn0 = UInt(ppn0Len.W)
|
||||
}
|
||||
|
||||
def paddrApply(ppn: UInt, vpnn: UInt): UInt = {
|
||||
Cat(Cat(ppn, vpnn), 0.U(3.W))
|
||||
}
|
||||
|
||||
def pteBundle = new Bundle {
|
||||
val reserved = UInt(pteResLen.W)
|
||||
val ppn = UInt(ppnLen.W)
|
||||
val rsw = UInt(2.W)
|
||||
val flag = new Bundle {
|
||||
val d = Bool()
|
||||
val a = Bool()
|
||||
val g = Bool()
|
||||
val u = Bool()
|
||||
val x = Bool()
|
||||
val w = Bool()
|
||||
val r = Bool()
|
||||
val v = Bool()
|
||||
}
|
||||
}
|
||||
|
||||
def satpBundle = new Bundle {
|
||||
val mode = UInt(satpModeLen.W)
|
||||
val asid = UInt(asidLen.W)
|
||||
val res = UInt(satpResLen.W)
|
||||
val ppn = UInt(ppnLen.W)
|
||||
}
|
||||
|
||||
def flagBundle = new Bundle {
|
||||
val d = Bool()
|
||||
val a = Bool()
|
||||
val g = Bool()
|
||||
val u = Bool()
|
||||
val x = Bool()
|
||||
val w = Bool()
|
||||
val r = Bool()
|
||||
val v = Bool()
|
||||
}
|
||||
|
||||
def tlbBundle = new Bundle {
|
||||
val vpn = UInt(vpnLen.W)
|
||||
val asid = UInt(asidLen.W)
|
||||
val flag = flagBundle
|
||||
val ppn = UInt(ppnLen.W)
|
||||
val rmask = UInt(maskLen.W)
|
||||
}
|
||||
}
|
|
@ -1,84 +0,0 @@
|
|||
package cpu.pipeline.decode
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines._
|
||||
import cpu.defines.Const._
|
||||
import cpu.defines.Instructions._
|
||||
import cpu.CpuConfig
|
||||
|
||||
class Issue(implicit val cpuConfig: CpuConfig) extends Module with HasCSRConst {
|
||||
val io = IO(new Bundle {
|
||||
// 输入
|
||||
val allow_to_go = Input(Bool())
|
||||
val instFifo = Input(new Bundle {
|
||||
val empty = Bool()
|
||||
val almost_empty = Bool()
|
||||
})
|
||||
val decodeInst = Input(Vec(cpuConfig.decoderNum, new Info()))
|
||||
val execute = Input(Vec(cpuConfig.commitNum, new MemRead()))
|
||||
// 输出
|
||||
val inst1 = Output(new Bundle {
|
||||
val allow_to_go = Bool()
|
||||
})
|
||||
})
|
||||
|
||||
if (cpuConfig.decoderNum == 2) {
|
||||
val inst = io.decodeInst
|
||||
|
||||
// inst buffer是否存有至少2条指令
|
||||
val instFifo_invalid = io.instFifo.empty || io.instFifo.almost_empty
|
||||
|
||||
// 结构冲突
|
||||
val lsu_conflict = inst.map(_.fusel === FuType.lsu).reduce(_ && _) // 访存单元最大支持1条指令的load和store
|
||||
val mdu_conflict = inst.map(_.fusel === FuType.mdu).reduce(_ && _) // 乘除单元最大支持1条指令的乘除法
|
||||
val csr_conflict = inst.map(_.fusel === FuType.csr).reduce(_ && _) // csr单元最大支持1条指令的读写
|
||||
val struct_conflict = lsu_conflict || mdu_conflict || csr_conflict
|
||||
|
||||
// 写后读冲突
|
||||
val load_stall = // inst1的源操作数需要经过load得到,但load指令还在exe级未访存
|
||||
io.execute(0).is_load && io.execute(0).reg_waddr.orR &&
|
||||
(inst(1).src1_ren && inst(1).src1_raddr === io.execute(0).reg_waddr ||
|
||||
inst(1).src2_ren && inst(1).src2_raddr === io.execute(0).reg_waddr) ||
|
||||
io.execute(1).is_load && io.execute(1).reg_waddr.orR &&
|
||||
(inst(1).src1_ren && inst(1).src1_raddr === io.execute(1).reg_waddr ||
|
||||
inst(1).src2_ren && inst(1).src2_raddr === io.execute(1).reg_waddr)
|
||||
val raw_reg = // inst1的源操作数是inst0的目的操作数
|
||||
inst(0).reg_wen && inst(0).reg_waddr.orR &&
|
||||
(inst(0).reg_waddr === inst(1).src1_raddr && inst(1).src1_ren ||
|
||||
inst(0).reg_waddr === inst(1).src2_raddr && inst(1).src2_ren)
|
||||
val data_conflict = raw_reg || load_stall
|
||||
|
||||
// bru指令只能在inst0执行
|
||||
val is_bru = inst.map(_.fusel === FuType.bru).reduce(_ || _)
|
||||
|
||||
// mou指令会导致流水线清空
|
||||
val is_mou = inst.map(_.fusel === FuType.mou).reduce(_ || _)
|
||||
|
||||
// 写satp指令会导致流水线清空
|
||||
val write_satp = VecInit(
|
||||
Seq.tabulate(cpuConfig.commitNum)(i =>
|
||||
inst(i).fusel === FuType.csr && CSROpType.isCSROp(inst(i).op) && inst(i).inst(31, 20) === Satp.U
|
||||
)
|
||||
).asUInt.orR
|
||||
|
||||
// uret、sret、mret指令会导致流水线清空
|
||||
val ret = HasRet(inst(0)) || HasRet(inst(1))
|
||||
|
||||
// 这些csr相关指令会导致流水线清空
|
||||
val is_some_csr_inst = write_satp || ret
|
||||
|
||||
// 下面的情况只进行单发射
|
||||
val single_issue = is_mou || is_bru || is_some_csr_inst
|
||||
|
||||
// 指令1是否允许执行
|
||||
io.inst1.allow_to_go :=
|
||||
io.allow_to_go && // 指令0允许执行
|
||||
!instFifo_invalid && // inst buffer存有至少2条指令
|
||||
!struct_conflict && // 无结构冲突
|
||||
!data_conflict && // 无写后读冲突
|
||||
!single_issue // 非单发射指令
|
||||
} else {
|
||||
io.inst1.allow_to_go := false.B
|
||||
}
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
package cpu.pipeline.decode
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
|
||||
import cpu.defines._
|
||||
import cpu.defines.Const._
|
||||
import cpu.CpuConfig
|
||||
|
||||
class JumpCtrl(implicit val cpuConfig: CpuConfig) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val in = Input(new Bundle {
|
||||
val pc = UInt(XLEN.W)
|
||||
val info = new Info()
|
||||
val src_info = new SrcInfo()
|
||||
val forward = Vec(cpuConfig.commitNum, new DataForwardToDecodeUnit())
|
||||
})
|
||||
val out = Output(new Bundle {
|
||||
val jump_register = Bool()
|
||||
val jump = Bool()
|
||||
val jump_target = UInt(XLEN.W)
|
||||
})
|
||||
})
|
||||
|
||||
val valid = io.in.info.valid
|
||||
val op = io.in.info.op
|
||||
val fusel = io.in.info.fusel
|
||||
val jump_inst = VecInit(BRUOpType.jal).contains(op) && fusel === FuType.bru
|
||||
val jump_register_inst = VecInit(BRUOpType.jalr).contains(op) && fusel === FuType.bru
|
||||
io.out.jump := (jump_inst || jump_register_inst && !io.out.jump_register) && valid
|
||||
if (cpuConfig.decoderNum == 2) {
|
||||
io.out.jump_register := jump_register_inst && io.in.info.src1_raddr.orR &&
|
||||
((io.in.forward(0).exe.wen && io.in.info.src1_raddr === io.in.forward(0).exe.waddr) ||
|
||||
(io.in.forward(1).exe.wen && io.in.info.src1_raddr === io.in.forward(1).exe.waddr) ||
|
||||
(io.in.forward(0).mem.wen && io.in.info.src1_raddr === io.in.forward(0).mem.waddr) ||
|
||||
(io.in.forward(1).mem.wen && io.in.info.src1_raddr === io.in.forward(1).mem.waddr))
|
||||
|
||||
} else {
|
||||
io.out.jump_register := jump_register_inst && io.in.info.src1_raddr.orR &&
|
||||
((io.in.forward(0).exe.wen && io.in.info.src1_raddr === io.in.forward(0).exe.waddr) ||
|
||||
(io.in.forward(0).mem.wen && io.in.info.src1_raddr === io.in.forward(0).mem.waddr))
|
||||
}
|
||||
io.out.jump_target := Mux(
|
||||
jump_inst,
|
||||
io.in.src_info.src1_data + io.in.src_info.src2_data,
|
||||
(io.in.src_info.src1_data + io.in.src_info.src2_data) & ~1.U(XLEN.W)
|
||||
)
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
package cpu.pipeline.execute
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines._
|
||||
import cpu.defines.Const._
|
||||
|
||||
class BranchCtrl extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val in = new Bundle {
|
||||
val pc = Input(UInt(XLEN.W))
|
||||
val info = Input(new Info())
|
||||
val src_info = Input(new SrcInfo())
|
||||
val pred_branch = Input(Bool())
|
||||
val jump_regiser = Input(Bool())
|
||||
val branch_target = Input(UInt(XLEN.W))
|
||||
}
|
||||
val out = new Bundle {
|
||||
val branch = Output(Bool())
|
||||
val pred_fail = Output(Bool())
|
||||
val target = Output(UInt(XLEN.W))
|
||||
}
|
||||
})
|
||||
val valid =
|
||||
io.in.info.fusel === FuType.bru && BRUOpType.isBranch(io.in.info.op) && io.in.info.valid
|
||||
val src1 = io.in.src_info.src1_data
|
||||
val src2 = io.in.src_info.src2_data
|
||||
val op = io.in.info.op
|
||||
val is_sub = !BRUOpType.isAdd(op)
|
||||
val adder = (src1 +& (src2 ^ Fill(XLEN, is_sub))) + is_sub
|
||||
val xor = src1 ^ src2
|
||||
val sltu = !adder(XLEN)
|
||||
val slt = xor(XLEN - 1) ^ sltu
|
||||
val table = List(
|
||||
BRUOpType.getBranchType(BRUOpType.beq) -> !xor.orR,
|
||||
BRUOpType.getBranchType(BRUOpType.blt) -> slt,
|
||||
BRUOpType.getBranchType(BRUOpType.bltu) -> sltu
|
||||
)
|
||||
io.out.pred_fail := io.in.pred_branch =/= io.out.branch
|
||||
io.out.branch :=
|
||||
(LookupTree(BRUOpType.getBranchType(op), table) ^ BRUOpType.isBranchInvert(op)) & valid
|
||||
io.out.target := Mux1H(
|
||||
Seq(
|
||||
(io.out.pred_fail && io.out.branch) -> io.in.branch_target,
|
||||
(io.out.pred_fail && !io.out.branch) -> (io.in.pc + 4.U),
|
||||
(io.in.jump_regiser) -> ((src1 + src2) & ~1.U(XLEN.W))
|
||||
)
|
||||
)
|
||||
}
|
|
@ -1,150 +0,0 @@
|
|||
package cpu.pipeline.fetch
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines.Const._
|
||||
import cpu._
|
||||
import cpu.pipeline.decode.Src12Read
|
||||
import cpu.defines.BRUOpType
|
||||
import cpu.defines.FuOpType
|
||||
import cpu.defines.FuType
|
||||
import cpu.defines.SignedExtend
|
||||
import cpu.pipeline.decode.DecoderBranchPredictorUnit
|
||||
import pipeline.decode.{DecoderBranchPredictorUnit, Src12Read}
|
||||
|
||||
class ExecuteUnitBranchPredictor extends Bundle {
|
||||
val bpuConfig = new BranchPredictorConfig()
|
||||
val pc = Output(UInt(XLEN.W))
|
||||
val update_pht_index = Output(UInt(bpuConfig.phtDepth.W))
|
||||
val branch_inst = Output(Bool())
|
||||
val branch = Output(Bool())
|
||||
}
|
||||
|
||||
class BranchPredictorIO(implicit cpuConfig: CpuConfig) extends Bundle {
|
||||
val bpuConfig = new BranchPredictorConfig()
|
||||
val decode = Flipped(new DecoderBranchPredictorUnit())
|
||||
|
||||
val instBuffer = new Bundle {
|
||||
val pc = Input(Vec(cpuConfig.instFetchNum, UInt(XLEN.W)))
|
||||
val pht_index = Output(Vec(cpuConfig.instFetchNum, UInt(bpuConfig.phtDepth.W)))
|
||||
}
|
||||
|
||||
val execute = Flipped(new ExecuteUnitBranchPredictor())
|
||||
}
|
||||
|
||||
class BranchPredictorUnit(implicit cpuConfig: CpuConfig) extends Module {
|
||||
val io = IO(new BranchPredictorIO())
|
||||
|
||||
if (cpuConfig.branchPredictor == "adaptive") {
|
||||
val adaptive_predictor = Module(new AdaptiveTwoLevelPredictor())
|
||||
io <> adaptive_predictor.io
|
||||
}
|
||||
|
||||
if (cpuConfig.branchPredictor == "global") {
|
||||
val global_predictor = Module(new GlobalBranchPredictor())
|
||||
io <> global_predictor.io
|
||||
}
|
||||
}
|
||||
|
||||
class GlobalBranchPredictor(
|
||||
GHR_DEPTH: Int = 4, // 可以记录的历史记录个数
|
||||
PC_HASH_WID: Int = 4, // 取得PC的宽度
|
||||
PHT_DEPTH: Int = 6, // 可以记录的历史个数
|
||||
BHT_DEPTH: Int = 4 // 取得PC的宽度
|
||||
)(
|
||||
implicit
|
||||
cpuConfig: CpuConfig)
|
||||
extends Module {
|
||||
val io = IO(new BranchPredictorIO())
|
||||
|
||||
val strongly_not_taken :: weakly_not_taken :: weakly_taken :: strongly_taken :: Nil = Enum(4)
|
||||
|
||||
val imm = io.decode.info.imm
|
||||
|
||||
io.decode.branch_inst := io.decode.info.valid &&
|
||||
FuType.bru === io.decode.info.fusel && BRUOpType.isBranch(io.decode.info.op)
|
||||
io.decode.target := io.decode.pc + imm
|
||||
// 局部预测模式
|
||||
|
||||
val bht = RegInit(VecInit(Seq.fill(1 << BHT_DEPTH)(0.U(PHT_DEPTH.W))))
|
||||
val pht = RegInit(VecInit(Seq.fill(1 << PHT_DEPTH)(strongly_taken)))
|
||||
val bht_index = io.decode.pc(1 + BHT_DEPTH, 2)
|
||||
val pht_index = bht(bht_index)
|
||||
|
||||
io.decode.branch :=
|
||||
io.decode.branch_inst && (pht(pht_index) === weakly_taken || pht(pht_index) === strongly_taken)
|
||||
val update_bht_index = io.execute.pc(1 + BHT_DEPTH, 2)
|
||||
val update_pht_index = bht(update_bht_index)
|
||||
|
||||
when(io.execute.branch_inst) {
|
||||
bht(update_bht_index) := Cat(bht(update_bht_index)(PHT_DEPTH - 2, 0), io.execute.branch)
|
||||
switch(pht(update_pht_index)) {
|
||||
is(strongly_not_taken) {
|
||||
pht(update_pht_index) := Mux(io.execute.branch, weakly_not_taken, strongly_not_taken)
|
||||
}
|
||||
is(weakly_not_taken) {
|
||||
pht(update_pht_index) := Mux(io.execute.branch, weakly_taken, strongly_not_taken)
|
||||
}
|
||||
is(weakly_taken) {
|
||||
pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_not_taken)
|
||||
}
|
||||
is(strongly_taken) {
|
||||
pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_taken)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class AdaptiveTwoLevelPredictor(
|
||||
)(
|
||||
implicit
|
||||
cpuConfig: CpuConfig)
|
||||
extends Module {
|
||||
val bpuConfig = new BranchPredictorConfig()
|
||||
val PHT_DEPTH = bpuConfig.phtDepth
|
||||
val BHT_DEPTH = bpuConfig.bhtDepth
|
||||
val io = IO(new BranchPredictorIO())
|
||||
|
||||
val strongly_not_taken :: weakly_not_taken :: weakly_taken :: strongly_taken :: Nil = Enum(4)
|
||||
|
||||
val imm = io.decode.info.imm
|
||||
|
||||
io.decode.branch_inst := io.decode.info.valid &&
|
||||
FuType.bru === io.decode.info.fusel && BRUOpType.isBranch(io.decode.info.op)
|
||||
io.decode.target := io.decode.pc + imm
|
||||
|
||||
val bht = RegInit(VecInit(Seq.fill(1 << BHT_DEPTH)(0.U(PHT_DEPTH.W))))
|
||||
val pht = RegInit(VecInit(Seq.fill(1 << PHT_DEPTH)(strongly_taken)))
|
||||
val pht_index = io.decode.pht_index
|
||||
|
||||
for (i <- 0 until cpuConfig.instFetchNum) {
|
||||
io.instBuffer.pht_index(i) := bht(io.instBuffer.pc(i)(1 + BHT_DEPTH, 2))
|
||||
}
|
||||
|
||||
io.decode.branch :=
|
||||
io.decode.branch_inst && (pht(pht_index) === weakly_taken || pht(pht_index) === strongly_taken)
|
||||
io.decode.update_pht_index := bht(io.decode.pc(1 + BHT_DEPTH, 2))
|
||||
|
||||
val update_bht_index = io.execute.pc(1 + BHT_DEPTH, 2)
|
||||
val update_pht_index = io.execute.update_pht_index
|
||||
|
||||
when(io.execute.branch_inst) {
|
||||
bht(update_bht_index) := Cat(bht(update_bht_index)(PHT_DEPTH - 2, 0), io.execute.branch)
|
||||
switch(pht(update_pht_index)) {
|
||||
is(strongly_not_taken) {
|
||||
pht(update_pht_index) := Mux(io.execute.branch, weakly_not_taken, strongly_not_taken)
|
||||
}
|
||||
is(weakly_not_taken) {
|
||||
pht(update_pht_index) := Mux(io.execute.branch, weakly_taken, strongly_not_taken)
|
||||
}
|
||||
is(weakly_taken) {
|
||||
pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_not_taken)
|
||||
}
|
||||
is(strongly_taken) {
|
||||
pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_taken)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,102 +0,0 @@
|
|||
package cpu.pipeline.fetch
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines.Const._
|
||||
import cpu.{BranchPredictorConfig, CpuConfig}
|
||||
import cpu.pipeline.decode.DecodeUnitInstFifo
|
||||
|
||||
class IfIdData extends Bundle {
|
||||
val bpuConfig = new BranchPredictorConfig()
|
||||
val inst = UInt(XLEN.W)
|
||||
val pht_index = UInt(bpuConfig.phtDepth.W)
|
||||
val addr_misaligned = Bool()
|
||||
val access_fault = Bool()
|
||||
val page_fault = Bool()
|
||||
val pc = UInt(XLEN.W)
|
||||
}
|
||||
|
||||
class InstFifo(implicit val cpuConfig: CpuConfig) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val do_flush = Input(Bool())
|
||||
|
||||
val wen = Input(Vec(cpuConfig.instFetchNum, Bool()))
|
||||
val write = Input(Vec(cpuConfig.instFetchNum, new IfIdData()))
|
||||
val full = Output(Bool())
|
||||
|
||||
val decoderUint = Flipped(new DecodeUnitInstFifo())
|
||||
})
|
||||
// fifo buffer
|
||||
val buffer = RegInit(VecInit(Seq.fill(cpuConfig.instFifoDepth)(0.U.asTypeOf(new IfIdData()))))
|
||||
|
||||
// fifo ptr
|
||||
val enq_ptr = RegInit(0.U(log2Ceil(cpuConfig.instFifoDepth).W))
|
||||
val deq_ptr = RegInit(0.U(log2Ceil(cpuConfig.instFifoDepth).W))
|
||||
val count = RegInit(0.U(log2Ceil(cpuConfig.instFifoDepth).W))
|
||||
|
||||
// config.instFifoDepth - 1 is the last element, config.instFifoDepth - 2 is the last second element
|
||||
// the second last element's valid decide whether the fifo is full
|
||||
|
||||
val full = count >= (cpuConfig.instFifoDepth - cpuConfig.instFetchNum).U
|
||||
val empty = count === 0.U
|
||||
val almost_empty = count === 1.U
|
||||
|
||||
io.full := full
|
||||
io.decoderUint.info.empty := empty
|
||||
io.decoderUint.info.almost_empty := almost_empty
|
||||
|
||||
// * deq * //
|
||||
io.decoderUint.inst(0) := MuxCase(
|
||||
buffer(deq_ptr),
|
||||
Seq(
|
||||
empty -> 0.U.asTypeOf(new IfIdData()),
|
||||
almost_empty -> buffer(deq_ptr)
|
||||
)
|
||||
)
|
||||
|
||||
io.decoderUint.inst(1) := MuxCase(
|
||||
buffer(deq_ptr + 1.U),
|
||||
Seq(
|
||||
(empty || almost_empty) -> 0.U.asTypeOf(new IfIdData())
|
||||
)
|
||||
)
|
||||
|
||||
val deq_num = MuxCase(
|
||||
0.U,
|
||||
Seq(
|
||||
(empty) -> 0.U,
|
||||
io.decoderUint.allow_to_go(1) -> 2.U,
|
||||
io.decoderUint.allow_to_go(0) -> 1.U
|
||||
)
|
||||
)
|
||||
|
||||
when(io.do_flush) {
|
||||
deq_ptr := 0.U
|
||||
}.otherwise {
|
||||
deq_ptr := deq_ptr + deq_num
|
||||
}
|
||||
|
||||
// * enq * //
|
||||
val enq_num = Wire(UInt(log2Ceil(cpuConfig.instFetchNum + 1).W))
|
||||
|
||||
for (i <- 0 until cpuConfig.instFetchNum) {
|
||||
when(io.wen(i)) {
|
||||
buffer(enq_ptr + i.U) := io.write(i)
|
||||
}
|
||||
}
|
||||
|
||||
when(io.do_flush) {
|
||||
enq_ptr := 0.U
|
||||
}.otherwise {
|
||||
enq_ptr := enq_ptr + enq_num
|
||||
}
|
||||
|
||||
enq_num := 0.U
|
||||
for (i <- 0 until cpuConfig.instFetchNum) {
|
||||
when(io.wen(i)) {
|
||||
enq_num := (i + 1).U
|
||||
}
|
||||
}
|
||||
|
||||
count := Mux(io.do_flush, 0.U, count + enq_num + cpuConfig.instFifoDepth.U - deq_num)
|
||||
}
|
|
@ -1,210 +0,0 @@
|
|||
package cpu.pipeline.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines._
|
||||
import cpu.defines.Const._
|
||||
import cpu.CpuConfig
|
||||
import chisel3.util.experimental.BoringUtils
|
||||
|
||||
class Lsu_DataMemory extends Bundle {
|
||||
val in = Input(new Bundle {
|
||||
val access_fault = Bool()
|
||||
val page_fault = Bool()
|
||||
val ready = Bool()
|
||||
val rdata = UInt(XLEN.W)
|
||||
})
|
||||
val out = Output(new Bundle {
|
||||
val en = Bool()
|
||||
val rlen = UInt(AXI_LEN_WID.W)
|
||||
val wen = Bool()
|
||||
val wstrb = UInt(AXI_STRB_WID.W)
|
||||
val addr = UInt(XLEN.W)
|
||||
val wdata = UInt(XLEN.W)
|
||||
})
|
||||
}
|
||||
|
||||
class Lsu_MemoryUnit extends Bundle {
|
||||
val in = Input(new Bundle {
|
||||
val mem_en = Bool()
|
||||
val info = new Info()
|
||||
val src_info = new SrcInfo()
|
||||
val ex = new ExceptionInfo()
|
||||
|
||||
val lr = Bool()
|
||||
val lr_addr = UInt(XLEN.W)
|
||||
|
||||
val allow_to_go = Bool()
|
||||
})
|
||||
val out = Output(new Bundle {
|
||||
val ready = Bool()
|
||||
val rdata = UInt(XLEN.W)
|
||||
val ex = new ExceptionInfo()
|
||||
// 用于指示dcache完成一次请求
|
||||
val complete_single_request = Bool()
|
||||
|
||||
val lr_wen = Bool()
|
||||
val lr_wbit = Bool()
|
||||
val lr_waddr = UInt(XLEN.W)
|
||||
})
|
||||
}
|
||||
|
||||
class Lsu(implicit val cpuConfig: CpuConfig) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val memoryUnit = new Lsu_MemoryUnit()
|
||||
val dataMemory = new Lsu_DataMemory()
|
||||
})
|
||||
|
||||
val atomAlu = Module(new AtomAlu()).io
|
||||
val lsExecute = Module(new LsExecute()).io
|
||||
|
||||
val valid = io.memoryUnit.in.mem_en
|
||||
val src1 = io.memoryUnit.in.src_info.src1_data
|
||||
val src2 = io.memoryUnit.in.src_info.src2_data
|
||||
val imm = io.memoryUnit.in.info.imm
|
||||
val func = io.memoryUnit.in.info.op
|
||||
val inst = io.memoryUnit.in.info.inst
|
||||
|
||||
val store_req = valid & LSUOpType.isStore(func)
|
||||
val load_req = valid & LSUOpType.isLoad(func)
|
||||
val atom_req = valid & LSUOpType.isAtom(func)
|
||||
val amo_req = valid & LSUOpType.isAMO(func)
|
||||
val lr_req = valid & LSUOpType.isLR(func)
|
||||
val sc_req = valid & LSUOpType.isSC(func)
|
||||
|
||||
val funct3 = inst(14, 12)
|
||||
val atom_d = funct3(0)
|
||||
|
||||
// Atom LR/SC Control Bits
|
||||
val lr = WireInit(Bool(), false.B)
|
||||
val lr_addr = WireInit(UInt(XLEN.W), DontCare)
|
||||
io.memoryUnit.out.lr_wen := io.memoryUnit.out.ready && (lr_req || sc_req)
|
||||
io.memoryUnit.out.lr_wbit := lr_req
|
||||
io.memoryUnit.out.lr_waddr := src1
|
||||
lr := io.memoryUnit.in.lr
|
||||
lr_addr := io.memoryUnit.in.lr_addr
|
||||
|
||||
val s_idle :: s_sc :: s_amo_a :: s_amo_s :: Nil = Enum(4)
|
||||
|
||||
val state = RegInit(s_idle)
|
||||
val atom_wdata = Reg(UInt(XLEN.W))
|
||||
val atom_rdata = Reg(UInt(XLEN.W))
|
||||
atomAlu.in.rdata := atom_wdata
|
||||
atomAlu.in.src2 := src2
|
||||
atomAlu.in.info := io.memoryUnit.in.info
|
||||
|
||||
val sc_invalid = (src1 =/= lr_addr || !lr) && sc_req
|
||||
|
||||
lsExecute.in.info := DontCare
|
||||
lsExecute.in.mem_addr := DontCare
|
||||
lsExecute.in.mem_en := false.B
|
||||
lsExecute.in.wdata := DontCare
|
||||
io.memoryUnit.out.ready := false.B
|
||||
|
||||
val allow_to_go = io.memoryUnit.in.allow_to_go
|
||||
val complete_single_request = Wire(Bool())
|
||||
// 只有amo操作时该信号才发挥作用
|
||||
complete_single_request := false.B
|
||||
|
||||
io.memoryUnit.out.complete_single_request := complete_single_request
|
||||
|
||||
switch(state) {
|
||||
is(s_idle) { // 0
|
||||
lsExecute.in.mem_en := io.memoryUnit.in.mem_en && !atom_req
|
||||
lsExecute.in.mem_addr := src1 + imm
|
||||
lsExecute.in.info.op := func
|
||||
lsExecute.in.wdata := src2
|
||||
io.memoryUnit.out.ready := lsExecute.out.ready || sc_invalid
|
||||
when(amo_req) {
|
||||
lsExecute.in.mem_en := true.B
|
||||
lsExecute.in.mem_addr := src1
|
||||
lsExecute.in.info.op := Mux(atom_d, LSUOpType.ld, LSUOpType.lw)
|
||||
lsExecute.in.wdata := DontCare
|
||||
io.memoryUnit.out.ready := false.B
|
||||
when(lsExecute.out.ready) {
|
||||
state := s_amo_a;
|
||||
// 告诉dcache已经完成一次访存操作,可以进入下一次访存
|
||||
complete_single_request := true.B
|
||||
}
|
||||
atom_wdata := lsExecute.out.rdata
|
||||
atom_rdata := lsExecute.out.rdata
|
||||
}
|
||||
when(lr_req) {
|
||||
lsExecute.in.mem_en := true.B
|
||||
lsExecute.in.mem_addr := src1
|
||||
lsExecute.in.info.op := Mux(atom_d, LSUOpType.ld, LSUOpType.lw)
|
||||
lsExecute.in.wdata := DontCare
|
||||
io.memoryUnit.out.ready := lsExecute.out.ready
|
||||
}
|
||||
when(sc_req) { state := Mux(sc_invalid, s_idle, s_sc) }
|
||||
}
|
||||
|
||||
is(s_sc) { // 1
|
||||
lsExecute.in.mem_en := true.B
|
||||
lsExecute.in.mem_addr := src1
|
||||
lsExecute.in.info.op := Mux(atom_d, LSUOpType.sd, LSUOpType.sw)
|
||||
lsExecute.in.wdata := src2
|
||||
io.memoryUnit.out.ready := lsExecute.out.ready
|
||||
when(allow_to_go) {
|
||||
state := s_idle
|
||||
}
|
||||
}
|
||||
|
||||
is(s_amo_a) { // 2
|
||||
lsExecute.in.mem_en := false.B
|
||||
lsExecute.in.mem_addr := DontCare
|
||||
lsExecute.in.info.op := DontCare
|
||||
lsExecute.in.wdata := DontCare
|
||||
io.memoryUnit.out.ready := false.B
|
||||
state := s_amo_s
|
||||
atom_wdata := atomAlu.out.result
|
||||
}
|
||||
|
||||
is(s_amo_s) { // 3
|
||||
lsExecute.in.mem_en := true.B
|
||||
lsExecute.in.mem_addr := src1
|
||||
lsExecute.in.info.op := Mux(atom_d, LSUOpType.sd, LSUOpType.sw)
|
||||
lsExecute.in.wdata := atom_wdata
|
||||
io.memoryUnit.out.ready := lsExecute.out.ready
|
||||
when(allow_to_go) {
|
||||
state := s_idle
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
when(
|
||||
lsExecute.out.addr_misaligned ||
|
||||
lsExecute.out.access_fault ||
|
||||
lsExecute.out.page_fault
|
||||
) {
|
||||
state := s_idle
|
||||
io.memoryUnit.out.ready := true.B
|
||||
complete_single_request := false.B // 发生例外时应该由ctrl的allow to go控制
|
||||
}
|
||||
|
||||
io.dataMemory <> lsExecute.dataMemory
|
||||
|
||||
io.memoryUnit.out.ex := io.memoryUnit.in.ex
|
||||
io.memoryUnit.out.ex.exception(loadAddrMisaligned) := (load_req || lr_req) && lsExecute.out.addr_misaligned
|
||||
io.memoryUnit.out.ex.exception(loadAccessFault) := (load_req || lr_req) && lsExecute.out.access_fault
|
||||
io.memoryUnit.out.ex.exception(loadPageFault) := (load_req || lr_req) && lsExecute.out.page_fault
|
||||
io.memoryUnit.out.ex
|
||||
.exception(storeAddrMisaligned) := (store_req || sc_req || amo_req) && lsExecute.out.addr_misaligned
|
||||
io.memoryUnit.out.ex.exception(storeAccessFault) := (store_req || sc_req || amo_req) && lsExecute.out.addr_misaligned
|
||||
io.memoryUnit.out.ex.exception(storePageFault) := (store_req || sc_req || amo_req) && lsExecute.out.page_fault
|
||||
|
||||
io.memoryUnit.out.ex.tval(loadAddrMisaligned) := io.dataMemory.out.addr
|
||||
io.memoryUnit.out.ex.tval(loadAccessFault) := io.dataMemory.out.addr
|
||||
io.memoryUnit.out.ex.tval(loadPageFault) := io.dataMemory.out.addr
|
||||
io.memoryUnit.out.ex.tval(storeAddrMisaligned) := io.dataMemory.out.addr
|
||||
io.memoryUnit.out.ex.tval(storeAccessFault) := io.dataMemory.out.addr
|
||||
io.memoryUnit.out.ex.tval(storePageFault) := io.dataMemory.out.addr
|
||||
|
||||
io.memoryUnit.out.rdata := MuxCase(
|
||||
lsExecute.out.rdata,
|
||||
Seq(
|
||||
(sc_req) -> sc_invalid,
|
||||
(amo_req) -> atom_rdata
|
||||
)
|
||||
)
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
package cpu.pipeline.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines._
|
||||
import cpu.defines.Const._
|
||||
|
||||
class Mou extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val in = Input(new Bundle {
|
||||
val info = new Info()
|
||||
val pc = UInt(XLEN.W)
|
||||
})
|
||||
val out = Output(new Bundle {
|
||||
val flush = Bool()
|
||||
val fence_i = Bool()
|
||||
val sfence_vma = Bool()
|
||||
val target = UInt(XLEN.W)
|
||||
})
|
||||
})
|
||||
|
||||
val valid = io.in.info.valid && io.in.info.fusel === FuType.mou
|
||||
val fence_i = valid && io.in.info.op === MOUOpType.fencei
|
||||
val sfence_vma = valid && io.in.info.op === MOUOpType.sfence_vma
|
||||
|
||||
io.out.flush := valid
|
||||
io.out.fence_i := fence_i
|
||||
io.out.sfence_vma := sfence_vma
|
||||
io.out.target := io.in.pc + 4.U
|
||||
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
package cpu.pipeline.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines._
|
||||
import cpu.defines.Const._
|
||||
import cpu.CpuConfig
|
||||
|
||||
class AtomAlu extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val in = Input(new Bundle {
|
||||
val rdata = Input(UInt(XLEN.W)) // load data
|
||||
val src2 = Input(UInt(XLEN.W)) // reg data
|
||||
val info = new Info()
|
||||
})
|
||||
val out = Output(new Bundle {
|
||||
val result = Output(UInt(XLEN.W))
|
||||
})
|
||||
})
|
||||
|
||||
val src1 = io.in.rdata
|
||||
val src2 = io.in.src2
|
||||
val op = io.in.info.op
|
||||
val is_sub = !LSUOpType.isAdd(op)
|
||||
val sum = (src1 +& (src2 ^ Fill(XLEN, is_sub))) + is_sub
|
||||
val oxr = src1 ^ src2
|
||||
val sltu = !sum(XLEN)
|
||||
val slt = oxr(XLEN - 1) ^ sltu
|
||||
val is_word = !io.in.info.inst(12)
|
||||
|
||||
val res = LookupTreeDefault(
|
||||
op(5, 0),
|
||||
sum,
|
||||
List(
|
||||
LSUOpType.amoswap -> src2,
|
||||
LSUOpType.amoadd -> sum,
|
||||
LSUOpType.amoxor -> oxr,
|
||||
LSUOpType.amoand -> (src1 & src2),
|
||||
LSUOpType.amoor -> (src1 | src2),
|
||||
LSUOpType.amomin -> Mux(slt(0), src1, src2),
|
||||
LSUOpType.amomax -> Mux(slt(0), src2, src1),
|
||||
LSUOpType.amominu -> Mux(sltu(0), src1, src2),
|
||||
LSUOpType.amomaxu -> Mux(sltu(0), src2, src1)
|
||||
)
|
||||
)
|
||||
|
||||
io.out.result := Mux(is_word, SignedExtend(res(31, 0), 64), res(XLEN - 1, 0))
|
||||
}
|
|
@ -1,143 +0,0 @@
|
|||
package cpu.pipeline.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines._
|
||||
import cpu.defines.Const._
|
||||
import cpu.CpuConfig
|
||||
|
||||
class LsExecute extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val dataMemory = new Lsu_DataMemory()
|
||||
val in = Input(new Bundle {
|
||||
val mem_en = Bool()
|
||||
val mem_addr = UInt(XLEN.W)
|
||||
val wdata = UInt(XLEN.W)
|
||||
val info = new Info()
|
||||
})
|
||||
val out = Output(new Bundle {
|
||||
val addr_misaligned = Bool()
|
||||
val access_fault = Bool()
|
||||
val page_fault = Bool()
|
||||
val rdata = UInt(XLEN.W)
|
||||
val ready = Bool()
|
||||
})
|
||||
})
|
||||
|
||||
def genWmask(addr: UInt, sizeEncode: UInt): UInt = {
|
||||
LookupTree(
|
||||
sizeEncode,
|
||||
List(
|
||||
"b00".U -> 0x1.U, //0001 << addr(2:0)
|
||||
"b01".U -> 0x3.U, //0011
|
||||
"b10".U -> 0xf.U, //1111
|
||||
"b11".U -> 0xff.U //11111111
|
||||
)
|
||||
) << addr(2, 0)
|
||||
}
|
||||
def genWdata(data: UInt, sizeEncode: UInt): UInt = {
|
||||
LookupTree(
|
||||
sizeEncode,
|
||||
List(
|
||||
"b00".U -> Fill(8, data(7, 0)),
|
||||
"b01".U -> Fill(4, data(15, 0)),
|
||||
"b10".U -> Fill(2, data(31, 0)),
|
||||
"b11".U -> data
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
def genWmask32(addr: UInt, sizeEncode: UInt): UInt = {
|
||||
LookupTree(
|
||||
sizeEncode,
|
||||
List(
|
||||
"b00".U -> 0x1.U, //0001 << addr(1:0)
|
||||
"b01".U -> 0x3.U, //0011
|
||||
"b10".U -> 0xf.U //1111
|
||||
)
|
||||
) << addr(1, 0)
|
||||
}
|
||||
def genWdata32(data: UInt, sizeEncode: UInt): UInt = {
|
||||
LookupTree(
|
||||
sizeEncode,
|
||||
List(
|
||||
"b00".U -> Fill(4, data(7, 0)),
|
||||
"b01".U -> Fill(2, data(15, 0)),
|
||||
"b10".U -> data
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
val valid = io.in.mem_en
|
||||
val addr = io.in.mem_addr
|
||||
val op = io.in.info.op
|
||||
|
||||
val is_store = valid && LSUOpType.isStore(op)
|
||||
val partial_load = !is_store && (op =/= LSUOpType.ld)
|
||||
|
||||
val size = op(1, 0)
|
||||
val req_addr = if (XLEN == 32) SignedExtend(addr, XLEN) else addr
|
||||
val req_wdata = if (XLEN == 32) genWdata32(io.in.wdata, size) else genWdata(io.in.wdata, size)
|
||||
val req_wmask = if (XLEN == 32) genWmask32(addr, size) else genWmask(addr, size)
|
||||
|
||||
val rdata = io.dataMemory.in.rdata
|
||||
val access_fault = io.dataMemory.in.access_fault
|
||||
val page_fault = io.dataMemory.in.page_fault
|
||||
|
||||
val rdata64 = LookupTree(
|
||||
addr(2, 0),
|
||||
List(
|
||||
"b000".U -> rdata(63, 0),
|
||||
"b001".U -> rdata(63, 8),
|
||||
"b010".U -> rdata(63, 16),
|
||||
"b011".U -> rdata(63, 24),
|
||||
"b100".U -> rdata(63, 32),
|
||||
"b101".U -> rdata(63, 40),
|
||||
"b110".U -> rdata(63, 48),
|
||||
"b111".U -> rdata(63, 56)
|
||||
)
|
||||
)
|
||||
val rdata32 = LookupTree(
|
||||
addr(1, 0),
|
||||
List(
|
||||
"b00".U -> rdata(31, 0),
|
||||
"b01".U -> rdata(31, 8),
|
||||
"b10".U -> rdata(31, 16),
|
||||
"b11".U -> rdata(31, 24)
|
||||
)
|
||||
)
|
||||
val rdata_result = if (XLEN == 32) rdata32 else rdata64
|
||||
val rdata_partial_result = LookupTree(
|
||||
op,
|
||||
List(
|
||||
LSUOpType.lb -> SignedExtend(rdata_result(7, 0), XLEN),
|
||||
LSUOpType.lh -> SignedExtend(rdata_result(15, 0), XLEN),
|
||||
LSUOpType.lw -> SignedExtend(rdata_result(31, 0), XLEN),
|
||||
LSUOpType.lbu -> ZeroExtend(rdata_result(7, 0), XLEN),
|
||||
LSUOpType.lhu -> ZeroExtend(rdata_result(15, 0), XLEN),
|
||||
LSUOpType.lwu -> ZeroExtend(rdata_result(31, 0), XLEN)
|
||||
)
|
||||
)
|
||||
val addr_aligned = LookupTree(
|
||||
op(1, 0),
|
||||
List(
|
||||
"b00".U -> true.B, //b
|
||||
"b01".U -> (addr(0) === 0.U), //h
|
||||
"b10".U -> (addr(1, 0) === 0.U), //w
|
||||
"b11".U -> (addr(2, 0) === 0.U) //d
|
||||
)
|
||||
)
|
||||
|
||||
io.dataMemory.out.en := valid && !io.out.addr_misaligned
|
||||
io.dataMemory.out.rlen := size
|
||||
io.dataMemory.out.wen := is_store
|
||||
io.dataMemory.out.wstrb := req_wmask
|
||||
io.dataMemory.out.addr := req_addr
|
||||
io.dataMemory.out.wdata := req_wdata
|
||||
|
||||
io.out.ready := io.dataMemory.in.ready && io.dataMemory.out.en
|
||||
io.out.rdata := Mux(partial_load, rdata_partial_result, rdata_result)
|
||||
io.out.addr_misaligned := valid && !addr_aligned
|
||||
io.out.access_fault := valid && access_fault
|
||||
io.out.page_fault := valid && page_fault
|
||||
}
|
|
@ -1,80 +0,0 @@
|
|||
package cpu.pipeline.writeback
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import cpu.defines.DEBUG
|
||||
|
||||
class CommitBuffer(
|
||||
depth: Int = 128)
|
||||
extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val flush = Input(Bool())
|
||||
val enq = Flipped(Vec(2, new DEBUG()))
|
||||
val deq = new DEBUG()
|
||||
})
|
||||
|
||||
val ram = RegInit(VecInit(Seq.fill(depth)(0.U.asTypeOf(new DEBUG()))))
|
||||
val enq_ptr = RegInit(0.U(log2Ceil(depth).W))
|
||||
val deq_ptr = RegInit(0.U(log2Ceil(depth).W))
|
||||
val maybe_full = RegInit(false.B)
|
||||
val ptr_match = enq_ptr === deq_ptr
|
||||
val empty = ptr_match && !maybe_full
|
||||
val full = ptr_match && maybe_full
|
||||
val do_enq = Wire(Vec(2, Bool()))
|
||||
val do_deq = WireDefault(io.deq.wb_rf_wen.orR)
|
||||
|
||||
for { i <- 0 until 2 } {
|
||||
do_enq(i) := io.enq(i).wb_rf_wen.orR
|
||||
}
|
||||
|
||||
val next_enq_ptr = MuxCase(
|
||||
enq_ptr,
|
||||
Seq(
|
||||
io.flush -> 0.U,
|
||||
(do_enq(0) && do_enq(1)) -> (enq_ptr + 2.U),
|
||||
(do_enq(0) || do_enq(1)) -> (enq_ptr + 1.U)
|
||||
)
|
||||
)
|
||||
|
||||
when(do_enq(0)) {
|
||||
ram(enq_ptr) := io.enq(0)
|
||||
}
|
||||
|
||||
val enq1_ptr = Mux(do_enq(0), enq_ptr + 1.U, enq_ptr)
|
||||
when(do_enq(1)) {
|
||||
ram(enq1_ptr) := io.enq(1)
|
||||
}
|
||||
|
||||
val next_deq_ptr =
|
||||
Mux(do_deq, deq_ptr + 1.U, deq_ptr)
|
||||
|
||||
when(do_enq(0) =/= do_deq) {
|
||||
maybe_full := do_enq(0)
|
||||
}
|
||||
|
||||
when(do_enq(1)) {
|
||||
maybe_full := do_enq(1)
|
||||
}
|
||||
|
||||
when(io.flush) {
|
||||
enq_ptr := 0.U
|
||||
deq_ptr := 0.U
|
||||
maybe_full := false.B
|
||||
}.otherwise {
|
||||
enq_ptr := next_enq_ptr
|
||||
deq_ptr := next_deq_ptr
|
||||
}
|
||||
|
||||
when(do_deq) {
|
||||
ram(deq_ptr).wb_rf_wen := 0.U
|
||||
}
|
||||
|
||||
when(empty) {
|
||||
do_deq := false.B
|
||||
io.deq := DontCare
|
||||
io.deq.wb_rf_wen := 0.U
|
||||
}.otherwise {
|
||||
io.deq := ram(deq_ptr)
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue