diff --git a/chisel/playground/src/pipeline/execute/ExecuteUnit.scala b/chisel/playground/src/pipeline/execute/ExecuteUnit.scala index 301c609..df2d088 100644 --- a/chisel/playground/src/pipeline/execute/ExecuteUnit.scala +++ b/chisel/playground/src/pipeline/execute/ExecuteUnit.scala @@ -7,114 +7,44 @@ import cpu.defines._ import cpu.defines.Const._ import cpu.pipeline.decode.RegWrite import cpu.pipeline.memory.ExecuteUnitMemoryUnit -import cpu.pipeline.fetch.ExecuteUnitBranchPredictor -class ExecuteUnit(implicit val cpuConfig: CpuConfig) extends Module { +class ExecuteUnit extends Module { val io = IO(new Bundle { val ctrl = new ExecuteCtrl() val executeStage = Input(new DecodeUnitExecuteUnit()) - val csr = Flipped(new CsrExecuteUnit()) - val bpu = new ExecuteUnitBranchPredictor() - val fetchUnit = Output(new Bundle { - val flush = Bool() - val target = UInt(XLEN.W) - }) val decodeUnit = new Bundle { val forward = Output( - Vec( - cpuConfig.commitNum, - new Bundle { - val exe = new RegWrite() - val is_load = Bool() - } - ) + new Bundle { + val exe = new RegWrite() + val is_load = Bool() + } ) } val memoryStage = Output(new ExecuteUnitMemoryUnit()) - val dataMemory = new Bundle { - val addr = Output(UInt(XLEN.W)) - } + val dataSram = new DataSram() }) - val valid = io.executeStage.inst.map(_.info.valid && io.ctrl.allow_to_go) - val fusel = io.executeStage.inst.map(_.info.fusel) + val valid = io.executeStage.data.info.valid && io.ctrl.ctrlSignal.allow_to_go + val fusel = io.executeStage.data.info.fusel - io.ctrl.flush := io.fetchUnit.flush - for (i <- 0 until (cpuConfig.commitNum)) { - io.ctrl.inst(i).is_load := - io.executeStage.inst(i).info.fusel === FuType.lsu && io.executeStage.inst(i).info.reg_wen - io.ctrl.inst(i).reg_waddr := io.executeStage.inst(i).info.reg_waddr - } - - val is_csr = VecInit( - Seq.tabulate(cpuConfig.commitNum)(i => - fusel(i) === FuType.csr && valid(i) && !(HasExcInt(io.executeStage.inst(i).ex)) - ) - ) - - io.csr.in.valid := is_csr.asUInt.orR - - def selectInstField[T <: Data](select: Vec[Bool], fields: Seq[T]): T = { - require(select.length == fields.length) - Mux1H(select.zip(fields)) - } - - io.csr.in.pc := selectInstField(is_csr, io.executeStage.inst.map(_.pc)) - io.csr.in.info := selectInstField(is_csr, io.executeStage.inst.map(_.info)) - io.csr.in.src_info := selectInstField(is_csr, io.executeStage.inst.map(_.src_info)) - io.csr.in.ex := selectInstField(is_csr, io.executeStage.inst.map(_.ex)) + io.ctrl.data.is_load := fusel === FuType.lsu && LSUOpType.isLoad(io.executeStage.data.info.op) + io.ctrl.data.reg_waddr := io.executeStage.data.info.reg_waddr + io.ctrl.flush := valid && fu.ctrl.flush + io.ctrl.target := fu.ctrl.target val fu = Module(new Fu()).io - fu.ctrl <> io.ctrl.fu - for (i <- 0 until (cpuConfig.commitNum)) { - fu.inst(i).pc := io.executeStage.inst(i).pc - fu.inst(i).info := io.executeStage.inst(i).info - fu.inst(i).src_info := io.executeStage.inst(i).src_info - } - fu.branch.pred_branch := io.executeStage.jump_branch_info.pred_branch - fu.branch.jump_regiser := io.executeStage.jump_branch_info.jump_regiser - fu.branch.branch_target := io.executeStage.jump_branch_info.branch_target + fu.data.pc := io.executeStage.data.pc + fu.data.info := io.executeStage.data.info + fu.data.src_info := io.executeStage.data.src_info - io.dataMemory.addr := fu.dataMemory.addr + io.memoryStage.data.pc := io.executeStage.data.pc + io.memoryStage.data.info := io.executeStage.data.info + io.memoryStage.data.src_info := io.executeStage.data.src_info + io.memoryStage.data.rd_info := fu.data.rd_info - io.bpu.pc := io.executeStage.inst(0).pc - io.bpu.update_pht_index := io.executeStage.jump_branch_info.update_pht_index - io.bpu.branch := fu.branch.branch - io.bpu.branch_inst := io.executeStage.jump_branch_info.branch_inst - - io.fetchUnit.flush := valid(0) && io.ctrl.allow_to_go && (fu.branch.flush || io.csr.out.flush) - io.fetchUnit.target := Mux(io.csr.out.flush, io.csr.out.target, fu.branch.target) - - for (i <- 0 until (cpuConfig.commitNum)) { - io.memoryStage.inst(i).pc := io.executeStage.inst(i).pc - io.memoryStage.inst(i).info := io.executeStage.inst(i).info - io.memoryStage.inst(i).src_info := io.executeStage.inst(i).src_info - io.memoryStage.inst(i).rd_info.wdata := DontCare - io.memoryStage.inst(i).rd_info.wdata(FuType.alu) := fu.inst(i).result.alu - io.memoryStage.inst(i).rd_info.wdata(FuType.bru) := io.executeStage.inst(i).pc + 4.U - io.memoryStage.inst(i).rd_info.wdata(FuType.mdu) := fu.inst(i).result.mdu - io.memoryStage.inst(i).rd_info.wdata(FuType.csr) := io.csr.out.rdata - io.memoryStage.inst(i).ex := Mux( - (HasExcInt(io.executeStage.inst(i).ex)) && io.executeStage.inst(i).info.valid, - io.executeStage.inst(i).ex, - MuxLookup(io.executeStage.inst(i).info.fusel, io.executeStage.inst(i).ex)( - Seq( - FuType.csr -> io.csr.out.ex - ) - ) - ) - io.memoryStage.inst(i).ex.exception(instAddrMisaligned) := - io.executeStage.inst(i).ex.exception(instAddrMisaligned) || - io.fetchUnit.flush && io.fetchUnit.target(log2Ceil(INST_WID / 8) - 1, 0).orR - io.memoryStage.inst(i).ex.tval(instAddrMisaligned) := Mux( - io.executeStage.inst(i).ex.exception(instAddrMisaligned), - io.executeStage.inst(i).ex.tval(instAddrMisaligned), - io.fetchUnit.target - ) - - io.decodeUnit.forward(i).exe.wen := io.memoryStage.inst(i).info.reg_wen - io.decodeUnit.forward(i).exe.waddr := io.memoryStage.inst(i).info.reg_waddr - io.decodeUnit.forward(i).exe.wdata := io.memoryStage.inst(i).rd_info.wdata(io.memoryStage.inst(i).info.fusel) - io.decodeUnit.forward(i).is_load := io.ctrl.inst(i).is_load - } + // 数据前递 + io.decodeUnit.forward.exe.wen := io.memoryStage.data.info.reg_wen + io.decodeUnit.forward.exe.waddr := io.memoryStage.data.info.reg_waddr + io.decodeUnit.forward.exe.wdata := io.memoryStage.data.rd_info.wdata(io.memoryStage.data.info.fusel) + io.decodeUnit.forward.is_load := io.ctrl.data.is_load } diff --git a/chisel/playground/src/pipeline/execute/fu/Lsu.scala b/chisel/playground/src/pipeline/execute/fu/Lsu.scala new file mode 100644 index 0000000..a20ef9f --- /dev/null +++ b/chisel/playground/src/pipeline/execute/fu/Lsu.scala @@ -0,0 +1,125 @@ +package cpu.pipeline.execute + +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig +import chisel3.util.experimental.BoringUtils + +class Lsu extends Module { + val io = IO(new Bundle { + val info = Input(new Info()) + val src_info = Input(new SrcInfo()) + val dataSram = new DataSram() + }) + + def genWmask(addr: UInt, sizeEncode: UInt): UInt = { + LookupTree( + sizeEncode, + List( + "b00".U -> 0x1.U, //0001 << addr(2:0) + "b01".U -> 0x3.U, //0011 + "b10".U -> 0xf.U, //1111 + "b11".U -> 0xff.U //11111111 + ) + ) << addr(2, 0) + } + def genWdata(data: UInt, sizeEncode: UInt): UInt = { + LookupTree( + sizeEncode, + List( + "b00".U -> Fill(8, data(7, 0)), + "b01".U -> Fill(4, data(15, 0)), + "b10".U -> Fill(2, data(31, 0)), + "b11".U -> data + ) + ) + } + + def genWmask32(addr: UInt, sizeEncode: UInt): UInt = { + LookupTree( + sizeEncode, + List( + "b00".U -> 0x1.U, //0001 << addr(1:0) + "b01".U -> 0x3.U, //0011 + "b10".U -> 0xf.U //1111 + ) + ) << addr(1, 0) + } + def genWdata32(data: UInt, sizeEncode: UInt): UInt = { + LookupTree( + sizeEncode, + List( + "b00".U -> Fill(4, data(7, 0)), + "b01".U -> Fill(2, data(15, 0)), + "b10".U -> data + ) + ) + } + + val valid = io.info.valid && io.info.fusel === FuType.lsu // && 无异常 + val op = io.info.op + val is_load = valid && LSUOpType.isLoad(op) + val is_store = valid && LSUOpType.isStore(op) + val addr = io.src_info.src1_data + io.info.imm + val wdata = io.src_info.src2_data + val partial_load = !is_store && (op =/= LSUOpType.ld) + val size = op(1, 0) + val req_addr = if (XLEN == 32) SignedExtend(addr, XLEN) else addr + val req_wdata = if (XLEN == 32) genWdata32(wdata, size) else genWdata(wdata, size) + val req_wmask = if (XLEN == 32) genWmask32(addr, size) else genWmask(addr, size) + val rdata = io.dataSram.rdata + + val rdata64 = LookupTree( + addr(2, 0), + List( + "b000".U -> rdata(63, 0), + "b001".U -> rdata(63, 8), + "b010".U -> rdata(63, 16), + "b011".U -> rdata(63, 24), + "b100".U -> rdata(63, 32), + "b101".U -> rdata(63, 40), + "b110".U -> rdata(63, 48), + "b111".U -> rdata(63, 56) + ) + ) + val rdata32 = LookupTree( + addr(1, 0), + List( + "b00".U -> rdata(31, 0), + "b01".U -> rdata(31, 8), + "b10".U -> rdata(31, 16), + "b11".U -> rdata(31, 24) + ) + ) + val rdata_result = if (XLEN == 32) rdata32 else rdata64 + val rdata_partial_result = LookupTree( + op, + List( + LSUOpType.lb -> SignedExtend(rdata_result(7, 0), XLEN), + LSUOpType.lh -> SignedExtend(rdata_result(15, 0), XLEN), + LSUOpType.lw -> SignedExtend(rdata_result(31, 0), XLEN), + LSUOpType.lbu -> ZeroExtend(rdata_result(7, 0), XLEN), + LSUOpType.lhu -> ZeroExtend(rdata_result(15, 0), XLEN), + LSUOpType.lwu -> ZeroExtend(rdata_result(31, 0), XLEN) + ) + ) + val addr_aligned = LookupTree( + op(1, 0), + List( + "b00".U -> true.B, //b + "b01".U -> (addr(0) === 0.U), //h + "b10".U -> (addr(1, 0) === 0.U), //w + "b11".U -> (addr(2, 0) === 0.U) //d + ) + ) + + io.dataSram.en := valid && addr_aligned + io.dataSram.wen := req_wmask & Fill(8, is_store) + io.dataSram.addr := req_addr + io.dataSram.wdata := req_wdata + + val result = Mux(partial_load, rdata_partial_result, rdata_result) + BoringUtils.addSource(result, "lsu_rdata") +} diff --git a/chisel/playground/src/pipeline/memory/MemoryUnit.scala b/chisel/playground/src/pipeline/memory/MemoryUnit.scala index f66f02b..33a7ba5 100644 --- a/chisel/playground/src/pipeline/memory/MemoryUnit.scala +++ b/chisel/playground/src/pipeline/memory/MemoryUnit.scala @@ -8,8 +8,9 @@ import cpu.CpuConfig import cpu.pipeline.decode.RegWrite import cpu.pipeline.execute.CsrMemoryUnit import cpu.pipeline.writeback.MemoryUnitWriteBackUnit +import chisel3.util.experimental.BoringUtils -class MemoryUnit(implicit val cpuConfig: CpuConfig) extends Module { +class MemoryUnit extends Module { val io = IO(new Bundle { val ctrl = new MemoryCtrl() val memoryStage = Input(new ExecuteUnitMemoryUnit()) @@ -17,89 +18,20 @@ class MemoryUnit(implicit val cpuConfig: CpuConfig) extends Module { val flush = Bool() val target = UInt(XLEN.W) }) - val decodeUnit = Output(Vec(cpuConfig.commitNum, new RegWrite())) - val csr = Flipped(new CsrMemoryUnit()) + val decodeUnit = Output(new RegWrite()) val writeBackStage = Output(new MemoryUnitWriteBackUnit()) - val dataMemory = new Lsu_DataMemory() + val dataSram = new DataSram() }) - val lsu = Module(new Lsu()).io - val mou = Module(new Mou()).io + val rdata = Wire(UInt(XLEN.W)) + BoringUtils.addSink(rdata, "lsu_rdata") - mou.in.info := io.memoryStage.inst(0).info - mou.in.pc := io.memoryStage.inst(0).pc + io.decodeUnit.wen := io.writeBackStage.data.info.reg_wen + io.decodeUnit.waddr := io.writeBackStage.data.info.reg_waddr + io.decodeUnit.wdata := io.writeBackStage.data.rd_info.wdata(io.writeBackStage.data.info.fusel) - def selectInstField[T <: Data](select: Vec[Bool], fields: Seq[T]): T = { - require(select.length == fields.length) - Mux1H(select.zip(fields)) - } - - val lsu_sel = VecInit( - io.memoryStage.inst(0).info.valid && - io.memoryStage.inst(0).info.fusel === FuType.lsu && - !HasExcInt(io.memoryStage.inst(0).ex), - io.memoryStage.inst(1).info.valid && - io.memoryStage.inst(1).info.fusel === FuType.lsu && - !HasExcInt(io.memoryStage.inst(1).ex) && !HasExcInt(io.memoryStage.inst(0).ex) // 要保证指令0无异常 - ) - lsu.memoryUnit.in.mem_en := lsu_sel.reduce(_ || _) - lsu.memoryUnit.in.info := selectInstField(lsu_sel, io.memoryStage.inst.map(_.info)) - lsu.memoryUnit.in.src_info := selectInstField(lsu_sel, io.memoryStage.inst.map(_.src_info)) - lsu.memoryUnit.in.ex := selectInstField(lsu_sel, io.memoryStage.inst.map(_.ex)) - lsu.dataMemory <> io.dataMemory - lsu.memoryUnit.in.allow_to_go := io.ctrl.allow_to_go - - val csr_sel = - HasExcInt(io.writeBackStage.inst(0).ex) || !HasExcInt(io.writeBackStage.inst(1).ex) - - io.csr.in.pc := 0.U - io.csr.in.ex := 0.U.asTypeOf(new ExceptionInfo()) - io.csr.in.info := 0.U.asTypeOf(new Info()) - - def selectInstField[T <: Data](select: Bool, fields: Seq[T]): T = { - Mux1H(Seq(select -> fields(0), !select -> fields(1))) - } - - when(io.ctrl.allow_to_go) { - io.csr.in.pc := selectInstField(csr_sel, io.memoryStage.inst.map(_.pc)) - io.csr.in.ex := selectInstField(csr_sel, io.writeBackStage.inst.map(_.ex)) - io.csr.in.info := selectInstField(csr_sel, io.memoryStage.inst.map(_.info)) - } - - io.csr.in.lr_wen := lsu.memoryUnit.out.lr_wen && io.ctrl.allow_to_go - io.csr.in.lr_wbit := lsu.memoryUnit.out.lr_wbit - io.csr.in.lr_waddr := lsu.memoryUnit.out.lr_waddr - lsu.memoryUnit.in.lr := io.csr.out.lr - lsu.memoryUnit.in.lr_addr := io.csr.out.lr_addr - - for (i <- 0 until cpuConfig.commitNum) { - io.decodeUnit(i).wen := io.writeBackStage.inst(i).info.reg_wen - io.decodeUnit(i).waddr := io.writeBackStage.inst(i).info.reg_waddr - io.decodeUnit(i).wdata := io.writeBackStage.inst(i).rd_info.wdata(io.writeBackStage.inst(i).info.fusel) - - io.writeBackStage.inst(i).pc := io.memoryStage.inst(i).pc - io.writeBackStage.inst(i).info := io.memoryStage.inst(i).info - io.writeBackStage.inst(i).rd_info.wdata := io.memoryStage.inst(i).rd_info.wdata - io.writeBackStage.inst(i).rd_info.wdata(FuType.lsu) := lsu.memoryUnit.out.rdata - io.writeBackStage.inst(i).ex := Mux( - lsu_sel(i), - lsu.memoryUnit.out.ex, - io.memoryStage.inst(i).ex - ) - } - - io.writeBackStage.inst(1).info.valid := io.memoryStage.inst(1).info.valid && - !(io.fetchUnit.flush && csr_sel) // 指令0导致flush时,不应该提交指令1 - - io.ctrl.flush := io.fetchUnit.flush - io.ctrl.mem_stall := !lsu.memoryUnit.out.ready && lsu.memoryUnit.in.mem_en - - io.ctrl.fence_i := mou.out.fence_i - io.ctrl.complete_single_request := lsu.memoryUnit.out.complete_single_request - - io.ctrl.sfence_vma.valid := mou.out.sfence_vma - io.ctrl.sfence_vma.src_info := io.memoryStage.inst(0).src_info - - io.fetchUnit.flush := io.ctrl.allow_to_go && (io.csr.out.flush || mou.out.flush) - io.fetchUnit.target := Mux(io.csr.out.flush, io.csr.out.target, mou.out.target) + io.writeBackStage.data.pc := io.memoryStage.data.pc + io.writeBackStage.data.info := io.memoryStage.data.info + io.writeBackStage.data.rd_info.wdata := io.memoryStage.data.rd_info.wdata + io.writeBackStage.data.rd_info.wdata(FuType.lsu) := rdata }