修改mul div

This commit is contained in:
Liphen 2023-11-20 15:51:13 +08:00
parent 6d165c916c
commit 2865b6e64c
2 changed files with 340 additions and 340 deletions

View File

@ -1,160 +1,160 @@
// package cpu.pipeline.execute package cpu.pipeline.execute
// import chisel3._ import chisel3._
// import chisel3.util._ import chisel3.util._
// import cpu.defines._ import cpu.defines._
// import cpu.defines.Const._ import cpu.defines.Const._
// import cpu.CpuConfig import cpu.CpuConfig
// class SignedDiv extends BlackBox with HasBlackBoxResource { class SignedDiv extends BlackBox with HasBlackBoxResource {
// val io = IO(new Bundle { val io = IO(new Bundle {
// val aclk = Input(Clock()) val aclk = Input(Clock())
// // 除数 // 除数
// val s_axis_divisor_tvalid = Input(Bool()) val s_axis_divisor_tvalid = Input(Bool())
// val s_axis_divisor_tready = Output(Bool()) val s_axis_divisor_tready = Output(Bool())
// val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) val s_axis_divisor_tdata = Input(UInt(DATA_WID.W))
// // 被除数 // 被除数
// val s_axis_dividend_tvalid = Input(Bool()) val s_axis_dividend_tvalid = Input(Bool())
// val s_axis_dividend_tready = Output(Bool()) val s_axis_dividend_tready = Output(Bool())
// val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) val s_axis_dividend_tdata = Input(UInt(DATA_WID.W))
// // 结果 // 结果
// val m_axis_dout_tvalid = Output(Bool()) val m_axis_dout_tvalid = Output(Bool())
// val m_axis_dout_tdata = Output(UInt(HILO_WID.W)) val m_axis_dout_tdata = Output(UInt(64.W))
// }) })
// } }
// class UnsignedDiv extends BlackBox with HasBlackBoxResource { class UnsignedDiv extends BlackBox with HasBlackBoxResource {
// val io = IO(new Bundle { val io = IO(new Bundle {
// val aclk = Input(Clock()) val aclk = Input(Clock())
// // 除数 // 除数
// val s_axis_divisor_tvalid = Input(Bool()) val s_axis_divisor_tvalid = Input(Bool())
// val s_axis_divisor_tready = Output(Bool()) val s_axis_divisor_tready = Output(Bool())
// val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) val s_axis_divisor_tdata = Input(UInt(DATA_WID.W))
// // 被除数 // 被除数
// val s_axis_dividend_tvalid = Input(Bool()) val s_axis_dividend_tvalid = Input(Bool())
// val s_axis_dividend_tready = Output(Bool()) val s_axis_dividend_tready = Output(Bool())
// val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) val s_axis_dividend_tdata = Input(UInt(DATA_WID.W))
// // 结果 // 结果
// val m_axis_dout_tvalid = Output(Bool()) val m_axis_dout_tvalid = Output(Bool())
// val m_axis_dout_tdata = Output(UInt(HILO_WID.W)) val m_axis_dout_tdata = Output(UInt(64.W))
// }) })
// } }
// class Div(implicit config: CpuConfig) extends Module { class Div(implicit config: CpuConfig) extends Module {
// val io = IO(new Bundle { val io = IO(new Bundle {
// val src1 = Input(UInt(DATA_WID.W)) val src1 = Input(UInt(DATA_WID.W))
// val src2 = Input(UInt(DATA_WID.W)) val src2 = Input(UInt(DATA_WID.W))
// val signed = Input(Bool()) val signed = Input(Bool())
// val start = Input(Bool()) val start = Input(Bool())
// val allow_to_go = Input(Bool()) val allow_to_go = Input(Bool())
// val ready = Output(Bool()) val ready = Output(Bool())
// val result = Output(UInt(HILO_WID.W)) val result = Output(UInt(64.W))
// }) })
// if (config.build) { if (config.build) {
// val signedDiv = Module(new SignedDiv()).io val signedDiv = Module(new SignedDiv()).io
// val unsignedDiv = Module(new UnsignedDiv()).io val unsignedDiv = Module(new UnsignedDiv()).io
// signedDiv.aclk := clock signedDiv.aclk := clock
// unsignedDiv.aclk := clock unsignedDiv.aclk := clock
// // 0为被除数1为除数 // 0为被除数1为除数
// val unsignedDiv_sent = Seq.fill(2)(RegInit(false.B)) val unsignedDiv_sent = Seq.fill(2)(RegInit(false.B))
// val unsignedDiv_done = RegInit(false.B) val unsignedDiv_done = RegInit(false.B)
// val signedDiv_sent = Seq.fill(2)(RegInit(false.B)) val signedDiv_sent = Seq.fill(2)(RegInit(false.B))
// val signedDiv_done = RegInit(false.B) val signedDiv_done = RegInit(false.B)
// when(unsignedDiv.s_axis_dividend_tready && unsignedDiv.s_axis_dividend_tvalid) { when(unsignedDiv.s_axis_dividend_tready && unsignedDiv.s_axis_dividend_tvalid) {
// unsignedDiv_sent(0) := true.B unsignedDiv_sent(0) := true.B
// }.elsewhen(io.ready && io.allow_to_go) { }.elsewhen(io.ready && io.allow_to_go) {
// unsignedDiv_sent(0) := false.B unsignedDiv_sent(0) := false.B
// } }
// when(unsignedDiv.s_axis_divisor_tready && unsignedDiv.s_axis_divisor_tvalid) { when(unsignedDiv.s_axis_divisor_tready && unsignedDiv.s_axis_divisor_tvalid) {
// unsignedDiv_sent(1) := true.B unsignedDiv_sent(1) := true.B
// }.elsewhen(io.ready && io.allow_to_go) { }.elsewhen(io.ready && io.allow_to_go) {
// unsignedDiv_sent(1) := false.B unsignedDiv_sent(1) := false.B
// } }
// when(signedDiv.s_axis_dividend_tready && signedDiv.s_axis_dividend_tvalid) { when(signedDiv.s_axis_dividend_tready && signedDiv.s_axis_dividend_tvalid) {
// signedDiv_sent(0) := true.B signedDiv_sent(0) := true.B
// }.elsewhen(io.ready && io.allow_to_go) { }.elsewhen(io.ready && io.allow_to_go) {
// signedDiv_sent(0) := false.B signedDiv_sent(0) := false.B
// } }
// when(signedDiv.s_axis_divisor_tready && signedDiv.s_axis_divisor_tvalid) { when(signedDiv.s_axis_divisor_tready && signedDiv.s_axis_divisor_tvalid) {
// signedDiv_sent(1) := true.B signedDiv_sent(1) := true.B
// }.elsewhen(io.ready && io.allow_to_go) { }.elsewhen(io.ready && io.allow_to_go) {
// signedDiv_sent(1) := false.B signedDiv_sent(1) := false.B
// } }
// when(signedDiv.m_axis_dout_tvalid && !io.allow_to_go) { when(signedDiv.m_axis_dout_tvalid && !io.allow_to_go) {
// signedDiv_done := true.B signedDiv_done := true.B
// }.elsewhen(io.allow_to_go) { }.elsewhen(io.allow_to_go) {
// signedDiv_done := false.B signedDiv_done := false.B
// } }
// when(unsignedDiv.m_axis_dout_tvalid && !io.allow_to_go) { when(unsignedDiv.m_axis_dout_tvalid && !io.allow_to_go) {
// unsignedDiv_done := true.B unsignedDiv_done := true.B
// }.elsewhen(io.allow_to_go) { }.elsewhen(io.allow_to_go) {
// unsignedDiv_done := false.B unsignedDiv_done := false.B
// } }
// // 被除数和除数的valid信号 // 被除数和除数的valid信号
// signedDiv.s_axis_dividend_tvalid := io.start && !signedDiv_sent(0) && io.signed signedDiv.s_axis_dividend_tvalid := io.start && !signedDiv_sent(0) && io.signed
// signedDiv.s_axis_divisor_tvalid := io.start && !signedDiv_sent(1) && io.signed signedDiv.s_axis_divisor_tvalid := io.start && !signedDiv_sent(1) && io.signed
// unsignedDiv.s_axis_dividend_tvalid := io.start && !unsignedDiv_sent(0) && !io.signed unsignedDiv.s_axis_dividend_tvalid := io.start && !unsignedDiv_sent(0) && !io.signed
// unsignedDiv.s_axis_divisor_tvalid := io.start && !unsignedDiv_sent(1) && !io.signed unsignedDiv.s_axis_divisor_tvalid := io.start && !unsignedDiv_sent(1) && !io.signed
// // 被除数和除数的值 // 被除数和除数的值
// signedDiv.s_axis_dividend_tdata := io.src1 signedDiv.s_axis_dividend_tdata := io.src1
// signedDiv.s_axis_divisor_tdata := io.src2 signedDiv.s_axis_divisor_tdata := io.src2
// unsignedDiv.s_axis_dividend_tdata := io.src1 unsignedDiv.s_axis_dividend_tdata := io.src1
// unsignedDiv.s_axis_divisor_tdata := io.src2 unsignedDiv.s_axis_divisor_tdata := io.src2
// io.ready := Mux( io.ready := Mux(
// io.signed, io.signed,
// signedDiv.m_axis_dout_tvalid || signedDiv_done, signedDiv.m_axis_dout_tvalid || signedDiv_done,
// unsignedDiv.m_axis_dout_tvalid || unsignedDiv_done, unsignedDiv.m_axis_dout_tvalid || unsignedDiv_done
// ) )
// val signedRes = val signedRes =
// Cat(signedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), signedDiv.m_axis_dout_tdata(HILO_WID - 1, DATA_WID)) Cat(signedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), signedDiv.m_axis_dout_tdata(64 - 1, DATA_WID))
// val unsignedRes = val unsignedRes =
// Cat(unsignedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), unsignedDiv.m_axis_dout_tdata(HILO_WID - 1, DATA_WID)) Cat(unsignedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), unsignedDiv.m_axis_dout_tdata(64 - 1, DATA_WID))
// io.result := Mux(io.signed, signedRes, unsignedRes) io.result := Mux(io.signed, signedRes, unsignedRes)
// } else { } else {
// val cnt = RegInit(0.U(log2Ceil(config.divClockNum + 1).W)) val cnt = RegInit(0.U(log2Ceil(config.divClockNum + 1).W))
// cnt := MuxCase( cnt := MuxCase(
// cnt, cnt,
// Seq( Seq(
// (io.start && !io.ready) -> (cnt + 1.U), (io.start && !io.ready) -> (cnt + 1.U),
// io.allow_to_go -> 0.U, io.allow_to_go -> 0.U
// ), )
// ) )
// val div_signed = io.signed val div_signed = io.signed
// val dividend_signed = io.src1(31) & div_signed val dividend_signed = io.src1(31) & div_signed
// val divisor_signed = io.src2(31) & div_signed val divisor_signed = io.src2(31) & div_signed
// val dividend_abs = Mux(dividend_signed, (-io.src1).asUInt, io.src1.asUInt) val dividend_abs = Mux(dividend_signed, (-io.src1).asUInt, io.src1.asUInt)
// val divisor_abs = Mux(divisor_signed, (-io.src2).asUInt, io.src2.asUInt) val divisor_abs = Mux(divisor_signed, (-io.src2).asUInt, io.src2.asUInt)
// val quotient_signed = (io.src1(31) ^ io.src2(31)) & div_signed val quotient_signed = (io.src1(31) ^ io.src2(31)) & div_signed
// val remainder_signed = io.src1(31) & div_signed val remainder_signed = io.src1(31) & div_signed
// val quotient_abs = dividend_abs / divisor_abs val quotient_abs = dividend_abs / divisor_abs
// val remainder_abs = dividend_abs - quotient_abs * divisor_abs val remainder_abs = dividend_abs - quotient_abs * divisor_abs
// val quotient = RegInit(0.S(32.W)) val quotient = RegInit(0.S(32.W))
// val remainder = RegInit(0.S(32.W)) val remainder = RegInit(0.S(32.W))
// when(io.start) { when(io.start) {
// quotient := Mux(quotient_signed, (-quotient_abs).asSInt, quotient_abs.asSInt) quotient := Mux(quotient_signed, (-quotient_abs).asSInt, quotient_abs.asSInt)
// remainder := Mux(remainder_signed, (-remainder_abs).asSInt, remainder_abs.asSInt) remainder := Mux(remainder_signed, (-remainder_abs).asSInt, remainder_abs.asSInt)
// } }
// io.ready := cnt >= config.divClockNum.U io.ready := cnt >= config.divClockNum.U
// io.result := Cat(remainder, quotient) io.result := Cat(remainder, quotient)
// } }
// } }

View File

@ -1,225 +1,225 @@
// package cpu.pipeline.execute package cpu.pipeline.execute
// import chisel3._ import chisel3._
// import chisel3.util._ import chisel3.util._
// import cpu.defines._ import cpu.defines._
// import cpu.defines.Const._ import cpu.defines.Const._
// import cpu.CpuConfig import cpu.CpuConfig
// class SignedMul extends BlackBox with HasBlackBoxResource { class SignedMul extends BlackBox with HasBlackBoxResource {
// val io = IO(new Bundle { val io = IO(new Bundle {
// val CLK = Input(Clock()) val CLK = Input(Clock())
// val CE = Input(Bool()) val CE = Input(Bool())
// val A = Input(UInt((DATA_WID + 1).W)) val A = Input(UInt((DATA_WID + 1).W))
// val B = Input(UInt((DATA_WID + 1).W)) val B = Input(UInt((DATA_WID + 1).W))
// val P = Output(UInt((HILO_WID + 2).W)) val P = Output(UInt((64 + 2).W))
})
}
class Mul(implicit val config: CpuConfig) extends Module {
val io = IO(new Bundle {
val src1 = Input(UInt(DATA_WID.W))
val src2 = Input(UInt(DATA_WID.W))
val signed = Input(Bool())
val start = Input(Bool())
val allow_to_go = Input(Bool())
val ready = Output(Bool())
val result = Output(UInt(64.W))
})
if (config.build) {
val signedMul = Module(new SignedMul()).io
val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W))
cnt := MuxCase(
cnt,
Seq(
(io.start && !io.ready) -> (cnt + 1.U),
io.allow_to_go -> 0.U
)
)
signedMul.CLK := clock
signedMul.CE := io.start
when(io.signed) {
signedMul.A := Cat(io.src1(DATA_WID - 1), io.src1)
signedMul.B := Cat(io.src2(DATA_WID - 1), io.src2)
}.otherwise {
signedMul.A := Cat(0.U(1.W), io.src1)
signedMul.B := Cat(0.U(1.W), io.src2)
}
io.ready := cnt >= config.mulClockNum.U
io.result := signedMul.P(64 - 1, 0)
} else {
val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W))
cnt := MuxCase(
cnt,
Seq(
(io.start && !io.ready) -> (cnt + 1.U),
io.allow_to_go -> 0.U
)
)
val signed = RegInit(0.U(64.W))
val unsigned = RegInit(0.U(64.W))
when(io.start) {
signed := (io.src1.asSInt * io.src2.asSInt).asUInt
unsigned := io.src1 * io.src2
}
io.result := Mux(io.signed, signed, unsigned)
io.ready := cnt >= config.mulClockNum.U
}
}
// class ArrayMulDataModule(len: Int) extends Module {
// val io = IO(new Bundle() {
// val a, b = Input(UInt(len.W))
// val regEnables = Input(Vec(2, Bool()))
// val result = Output(UInt((2 * len).W))
// }) // })
// } // val (a, b) = (io.a, io.b)
// class Mul(implicit val config: CpuConfig) extends Module { // val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len + 1).W))
// val io = IO(new Bundle { // b_sext := SignExt(b, len + 1)
// val src1 = Input(UInt(DATA_WID.W)) // bx2 := b_sext << 1
// val src2 = Input(UInt(DATA_WID.W)) // neg_b := (~b_sext).asUInt()
// val signed = Input(Bool()) // neg_bx2 := neg_b << 1
// val start = Input(Bool())
// val allow_to_go = Input(Bool())
// val ready = Output(Bool()) // val columns: Array[Seq[Bool]] = Array.fill(2 * len)(Seq())
// val result = Output(UInt(HILO_WID.W))
// })
// if (config.build) { // var last_x = WireInit(0.U(3.W))
// val signedMul = Module(new SignedMul()).io // for (i <- Range(0, len, 2)) {
// val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W)) // val x = if (i == 0) Cat(a(1, 0), 0.U(1.W)) else if (i + 1 == len) SignExt(a(i, i - 1), 3) else a(i + 1, i - 1)
// val pp_temp = MuxLookup(
// cnt := MuxCase( // x,
// cnt, // 0.U,
// Seq( // Seq(
// (io.start && !io.ready) -> (cnt + 1.U), // 1.U -> b_sext,
// io.allow_to_go -> 0.U, // 2.U -> b_sext,
// 3.U -> bx2,
// 4.U -> neg_bx2,
// 5.U -> neg_b,
// 6.U -> neg_b,
// ), // ),
// ) // )
// val s = pp_temp(len)
// signedMul.CLK := clock // val t = MuxLookup(
// signedMul.CE := io.start // last_x,
// when(io.signed) { // 0.U(2.W),
// signedMul.A := Cat(io.src1(DATA_WID - 1), io.src1)
// signedMul.B := Cat(io.src2(DATA_WID - 1), io.src2)
// }.otherwise {
// signedMul.A := Cat(0.U(1.W), io.src1)
// signedMul.B := Cat(0.U(1.W), io.src2)
// }
// io.ready := cnt >= config.mulClockNum.U
// io.result := signedMul.P(HILO_WID - 1, 0)
// } else {
// val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W))
// cnt := MuxCase(
// cnt,
// Seq( // Seq(
// (io.start && !io.ready) -> (cnt + 1.U), // 4.U -> 2.U(2.W),
// io.allow_to_go -> 0.U, // 5.U -> 1.U(2.W),
// 6.U -> 1.U(2.W),
// ), // ),
// ) // )
// last_x = x
// val signed = RegInit(0.U(HILO_WID.W)) // val (pp, weight) = i match {
// val unsigned = RegInit(0.U(HILO_WID.W)) // case 0 =>
// when(io.start) { // (Cat(~s, s, s, pp_temp), 0)
// signed := (io.src1.asSInt * io.src2.asSInt).asUInt // case n if (n == len - 1) || (n == len - 2) =>
// unsigned := io.src1 * io.src2 // (Cat(~s, pp_temp, t), i - 2)
// case _ =>
// (Cat(1.U(1.W), ~s, pp_temp, t), i - 2)
// }
// for (j <- columns.indices) {
// if (j >= weight && j < (weight + pp.getWidth)) {
// columns(j) = columns(j) :+ pp(j - weight)
// }
// } // }
// io.result := Mux(io.signed, signed, unsigned)
// io.ready := cnt >= config.mulClockNum.U
// } // }
// def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = {
// var sum = Seq[Bool]()
// var cout1 = Seq[Bool]()
// var cout2 = Seq[Bool]()
// col.size match {
// case 1 => // do nothing
// sum = col ++ cin
// case 2 =>
// val c22 = Module(new C22)
// c22.io.in := col
// sum = c22.io.out(0).asBool() +: cin
// cout2 = Seq(c22.io.out(1).asBool())
// case 3 =>
// val c32 = Module(new C32)
// c32.io.in := col
// sum = c32.io.out(0).asBool() +: cin
// cout2 = Seq(c32.io.out(1).asBool())
// case 4 =>
// val c53 = Module(new C53)
// for ((x, y) <- c53.io.in.take(4) zip col) {
// x := y
// }
// c53.io.in.last := (if (cin.nonEmpty) cin.head else 0.U)
// sum = Seq(c53.io.out(0).asBool()) ++ (if (cin.nonEmpty) cin.drop(1) else Nil)
// cout1 = Seq(c53.io.out(1).asBool())
// cout2 = Seq(c53.io.out(2).asBool())
// case n =>
// val cin_1 = if (cin.nonEmpty) Seq(cin.head) else Nil
// val cin_2 = if (cin.nonEmpty) cin.drop(1) else Nil
// val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1)
// val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2)
// sum = s_1 ++ s_2
// cout1 = c_1_1 ++ c_2_1
// cout2 = c_1_2 ++ c_2_2
// }
// (sum, cout1, cout2)
// }
// def max(in: Iterable[Int]): Int = in.reduce((a, b) => if (a > b) a else b)
// def addAll(cols: Array[Seq[Bool]], depth: Int): (UInt, UInt) = {
// if (max(cols.map(_.size)) <= 2) {
// val sum = Cat(cols.map(_(0)).reverse)
// var k = 0
// while (cols(k).size == 1) k = k + 1
// val carry = Cat(cols.drop(k).map(_(1)).reverse)
// (sum, Cat(carry, 0.U(k.W)))
// } else {
// val columns_next = Array.fill(2 * len)(Seq[Bool]())
// var cout1, cout2 = Seq[Bool]()
// for (i <- cols.indices) {
// val (s, c1, c2) = addOneColumn(cols(i), cout1)
// columns_next(i) = s ++ cout2
// cout1 = c1
// cout2 = c2
// }
// val needReg = depth == 4
// val toNextLayer =
// if (needReg)
// columns_next.map(_.map(x => RegEnable(x, io.regEnables(1))))
// else
// columns_next
// addAll(toNextLayer, depth + 1)
// }
// }
// val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0))))
// val (sum, carry) = addAll(cols = columns_reg, depth = 0)
// io.result := sum + carry
// } // }
// // class ArrayMulDataModule(len: Int) extends Module { // class ArrayMultiplier(len: Int) extends Module {
// // val io = IO(new Bundle() { // override def latency = 2
// // val a, b = Input(UInt(len.W))
// // val regEnables = Input(Vec(2, Bool()))
// // val result = Output(UInt((2 * len).W))
// // })
// // val (a, b) = (io.a, io.b)
// // val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len + 1).W)) // val mulDataModule = Module(new ArrayMulDataModule(len))
// // b_sext := SignExt(b, len + 1) // mulDataModule.io.a := io.in.bits.src(0)
// // bx2 := b_sext << 1 // mulDataModule.io.b := io.in.bits.src(1)
// // neg_b := (~b_sext).asUInt() // mulDataModule.io.regEnables := VecInit((1 to latency) map (i => regEnable(i)))
// // neg_bx2 := neg_b << 1 // val result = mulDataModule.io.result
// // val columns: Array[Seq[Bool]] = Array.fill(2 * len)(Seq()) // var ctrlVec = Seq(ctrl)
// for (i <- 1 to latency) {
// ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i - 1))
// }
// val 32 = len - 1
// val res = Mux(ctrlVec.last.isHi, result(2 * 32 - 1, 32), result(32 - 1, 0))
// // var last_x = WireInit(0.U(3.W)) // io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31, 0), 32), res)
// // for (i <- Range(0, len, 2)) {
// // val x = if (i == 0) Cat(a(1, 0), 0.U(1.W)) else if (i + 1 == len) SignExt(a(i, i - 1), 3) else a(i + 1, i - 1)
// // val pp_temp = MuxLookup(
// // x,
// // 0.U,
// // Seq(
// // 1.U -> b_sext,
// // 2.U -> b_sext,
// // 3.U -> bx2,
// // 4.U -> neg_bx2,
// // 5.U -> neg_b,
// // 6.U -> neg_b,
// // ),
// // )
// // val s = pp_temp(len)
// // val t = MuxLookup(
// // last_x,
// // 0.U(2.W),
// // Seq(
// // 4.U -> 2.U(2.W),
// // 5.U -> 1.U(2.W),
// // 6.U -> 1.U(2.W),
// // ),
// // )
// // last_x = x
// // val (pp, weight) = i match {
// // case 0 =>
// // (Cat(~s, s, s, pp_temp), 0)
// // case n if (n == len - 1) || (n == len - 2) =>
// // (Cat(~s, pp_temp, t), i - 2)
// // case _ =>
// // (Cat(1.U(1.W), ~s, pp_temp, t), i - 2)
// // }
// // for (j <- columns.indices) {
// // if (j >= weight && j < (weight + pp.getWidth)) {
// // columns(j) = columns(j) :+ pp(j - weight)
// // }
// // }
// // }
// // def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = { // XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n")
// // var sum = Seq[Bool]() // }
// // var cout1 = Seq[Bool]()
// // var cout2 = Seq[Bool]()
// // col.size match {
// // case 1 => // do nothing
// // sum = col ++ cin
// // case 2 =>
// // val c22 = Module(new C22)
// // c22.io.in := col
// // sum = c22.io.out(0).asBool() +: cin
// // cout2 = Seq(c22.io.out(1).asBool())
// // case 3 =>
// // val c32 = Module(new C32)
// // c32.io.in := col
// // sum = c32.io.out(0).asBool() +: cin
// // cout2 = Seq(c32.io.out(1).asBool())
// // case 4 =>
// // val c53 = Module(new C53)
// // for ((x, y) <- c53.io.in.take(4) zip col) {
// // x := y
// // }
// // c53.io.in.last := (if (cin.nonEmpty) cin.head else 0.U)
// // sum = Seq(c53.io.out(0).asBool()) ++ (if (cin.nonEmpty) cin.drop(1) else Nil)
// // cout1 = Seq(c53.io.out(1).asBool())
// // cout2 = Seq(c53.io.out(2).asBool())
// // case n =>
// // val cin_1 = if (cin.nonEmpty) Seq(cin.head) else Nil
// // val cin_2 = if (cin.nonEmpty) cin.drop(1) else Nil
// // val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1)
// // val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2)
// // sum = s_1 ++ s_2
// // cout1 = c_1_1 ++ c_2_1
// // cout2 = c_1_2 ++ c_2_2
// // }
// // (sum, cout1, cout2)
// // }
// // def max(in: Iterable[Int]): Int = in.reduce((a, b) => if (a > b) a else b)
// // def addAll(cols: Array[Seq[Bool]], depth: Int): (UInt, UInt) = {
// // if (max(cols.map(_.size)) <= 2) {
// // val sum = Cat(cols.map(_(0)).reverse)
// // var k = 0
// // while (cols(k).size == 1) k = k + 1
// // val carry = Cat(cols.drop(k).map(_(1)).reverse)
// // (sum, Cat(carry, 0.U(k.W)))
// // } else {
// // val columns_next = Array.fill(2 * len)(Seq[Bool]())
// // var cout1, cout2 = Seq[Bool]()
// // for (i <- cols.indices) {
// // val (s, c1, c2) = addOneColumn(cols(i), cout1)
// // columns_next(i) = s ++ cout2
// // cout1 = c1
// // cout2 = c2
// // }
// // val needReg = depth == 4
// // val toNextLayer =
// // if (needReg)
// // columns_next.map(_.map(x => RegEnable(x, io.regEnables(1))))
// // else
// // columns_next
// // addAll(toNextLayer, depth + 1)
// // }
// // }
// // val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0))))
// // val (sum, carry) = addAll(cols = columns_reg, depth = 0)
// // io.result := sum + carry
// // }
// // class ArrayMultiplier(len: Int) extends Module {
// // override def latency = 2
// // val mulDataModule = Module(new ArrayMulDataModule(len))
// // mulDataModule.io.a := io.in.bits.src(0)
// // mulDataModule.io.b := io.in.bits.src(1)
// // mulDataModule.io.regEnables := VecInit((1 to latency) map (i => regEnable(i)))
// // val result = mulDataModule.io.result
// // var ctrlVec = Seq(ctrl)
// // for (i <- 1 to latency) {
// // ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i - 1))
// // }
// // val 32 = len - 1
// // val res = Mux(ctrlVec.last.isHi, result(2 * 32 - 1, 32), result(32 - 1, 0))
// // io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31, 0), 32), res)
// // XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n")
// // }