From 2865b6e64cb9fed6e77ec546db9e1f11db9b6cad Mon Sep 17 00:00:00 2001 From: Liphen Date: Mon, 20 Nov 2023 15:51:13 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9mul=20div?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../playground/src/pipeline/execute/Div.scala | 272 ++++++------ .../playground/src/pipeline/execute/Mul.scala | 408 +++++++++--------- 2 files changed, 340 insertions(+), 340 deletions(-) diff --git a/chisel/playground/src/pipeline/execute/Div.scala b/chisel/playground/src/pipeline/execute/Div.scala index 53ff677..8623443 100644 --- a/chisel/playground/src/pipeline/execute/Div.scala +++ b/chisel/playground/src/pipeline/execute/Div.scala @@ -1,160 +1,160 @@ -// package cpu.pipeline.execute +package cpu.pipeline.execute -// import chisel3._ -// import chisel3.util._ -// import cpu.defines._ -// import cpu.defines.Const._ -// import cpu.CpuConfig +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig -// class SignedDiv extends BlackBox with HasBlackBoxResource { -// val io = IO(new Bundle { -// val aclk = Input(Clock()) -// // 除数 -// val s_axis_divisor_tvalid = Input(Bool()) -// val s_axis_divisor_tready = Output(Bool()) -// val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) -// // 被除数 -// val s_axis_dividend_tvalid = Input(Bool()) -// val s_axis_dividend_tready = Output(Bool()) -// val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) -// // 结果 -// val m_axis_dout_tvalid = Output(Bool()) -// val m_axis_dout_tdata = Output(UInt(HILO_WID.W)) -// }) -// } +class SignedDiv extends BlackBox with HasBlackBoxResource { + val io = IO(new Bundle { + val aclk = Input(Clock()) + // 除数 + val s_axis_divisor_tvalid = Input(Bool()) + val s_axis_divisor_tready = Output(Bool()) + val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) + // 被除数 + val s_axis_dividend_tvalid = Input(Bool()) + val s_axis_dividend_tready = Output(Bool()) + val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) + // 结果 + val m_axis_dout_tvalid = Output(Bool()) + val m_axis_dout_tdata = Output(UInt(64.W)) + }) +} -// class UnsignedDiv extends BlackBox with HasBlackBoxResource { -// val io = IO(new Bundle { -// val aclk = Input(Clock()) -// // 除数 -// val s_axis_divisor_tvalid = Input(Bool()) -// val s_axis_divisor_tready = Output(Bool()) -// val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) -// // 被除数 -// val s_axis_dividend_tvalid = Input(Bool()) -// val s_axis_dividend_tready = Output(Bool()) -// val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) -// // 结果 -// val m_axis_dout_tvalid = Output(Bool()) -// val m_axis_dout_tdata = Output(UInt(HILO_WID.W)) -// }) -// } +class UnsignedDiv extends BlackBox with HasBlackBoxResource { + val io = IO(new Bundle { + val aclk = Input(Clock()) + // 除数 + val s_axis_divisor_tvalid = Input(Bool()) + val s_axis_divisor_tready = Output(Bool()) + val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) + // 被除数 + val s_axis_dividend_tvalid = Input(Bool()) + val s_axis_dividend_tready = Output(Bool()) + val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) + // 结果 + val m_axis_dout_tvalid = Output(Bool()) + val m_axis_dout_tdata = Output(UInt(64.W)) + }) +} -// class Div(implicit config: CpuConfig) extends Module { -// val io = IO(new Bundle { -// val src1 = Input(UInt(DATA_WID.W)) -// val src2 = Input(UInt(DATA_WID.W)) -// val signed = Input(Bool()) -// val start = Input(Bool()) -// val allow_to_go = Input(Bool()) +class Div(implicit config: CpuConfig) extends Module { + val io = IO(new Bundle { + val src1 = Input(UInt(DATA_WID.W)) + val src2 = Input(UInt(DATA_WID.W)) + val signed = Input(Bool()) + val start = Input(Bool()) + val allow_to_go = Input(Bool()) -// val ready = Output(Bool()) -// val result = Output(UInt(HILO_WID.W)) -// }) + val ready = Output(Bool()) + val result = Output(UInt(64.W)) + }) -// if (config.build) { -// val signedDiv = Module(new SignedDiv()).io -// val unsignedDiv = Module(new UnsignedDiv()).io + if (config.build) { + val signedDiv = Module(new SignedDiv()).io + val unsignedDiv = Module(new UnsignedDiv()).io -// signedDiv.aclk := clock -// unsignedDiv.aclk := clock + signedDiv.aclk := clock + unsignedDiv.aclk := clock -// // 0为被除数,1为除数 -// val unsignedDiv_sent = Seq.fill(2)(RegInit(false.B)) -// val unsignedDiv_done = RegInit(false.B) -// val signedDiv_sent = Seq.fill(2)(RegInit(false.B)) -// val signedDiv_done = RegInit(false.B) + // 0为被除数,1为除数 + val unsignedDiv_sent = Seq.fill(2)(RegInit(false.B)) + val unsignedDiv_done = RegInit(false.B) + val signedDiv_sent = Seq.fill(2)(RegInit(false.B)) + val signedDiv_done = RegInit(false.B) -// when(unsignedDiv.s_axis_dividend_tready && unsignedDiv.s_axis_dividend_tvalid) { -// unsignedDiv_sent(0) := true.B -// }.elsewhen(io.ready && io.allow_to_go) { -// unsignedDiv_sent(0) := false.B -// } -// when(unsignedDiv.s_axis_divisor_tready && unsignedDiv.s_axis_divisor_tvalid) { -// unsignedDiv_sent(1) := true.B -// }.elsewhen(io.ready && io.allow_to_go) { -// unsignedDiv_sent(1) := false.B -// } + when(unsignedDiv.s_axis_dividend_tready && unsignedDiv.s_axis_dividend_tvalid) { + unsignedDiv_sent(0) := true.B + }.elsewhen(io.ready && io.allow_to_go) { + unsignedDiv_sent(0) := false.B + } + when(unsignedDiv.s_axis_divisor_tready && unsignedDiv.s_axis_divisor_tvalid) { + unsignedDiv_sent(1) := true.B + }.elsewhen(io.ready && io.allow_to_go) { + unsignedDiv_sent(1) := false.B + } -// when(signedDiv.s_axis_dividend_tready && signedDiv.s_axis_dividend_tvalid) { -// signedDiv_sent(0) := true.B -// }.elsewhen(io.ready && io.allow_to_go) { -// signedDiv_sent(0) := false.B -// } -// when(signedDiv.s_axis_divisor_tready && signedDiv.s_axis_divisor_tvalid) { -// signedDiv_sent(1) := true.B -// }.elsewhen(io.ready && io.allow_to_go) { -// signedDiv_sent(1) := false.B -// } + when(signedDiv.s_axis_dividend_tready && signedDiv.s_axis_dividend_tvalid) { + signedDiv_sent(0) := true.B + }.elsewhen(io.ready && io.allow_to_go) { + signedDiv_sent(0) := false.B + } + when(signedDiv.s_axis_divisor_tready && signedDiv.s_axis_divisor_tvalid) { + signedDiv_sent(1) := true.B + }.elsewhen(io.ready && io.allow_to_go) { + signedDiv_sent(1) := false.B + } -// when(signedDiv.m_axis_dout_tvalid && !io.allow_to_go) { -// signedDiv_done := true.B -// }.elsewhen(io.allow_to_go) { -// signedDiv_done := false.B -// } + when(signedDiv.m_axis_dout_tvalid && !io.allow_to_go) { + signedDiv_done := true.B + }.elsewhen(io.allow_to_go) { + signedDiv_done := false.B + } -// when(unsignedDiv.m_axis_dout_tvalid && !io.allow_to_go) { -// unsignedDiv_done := true.B -// }.elsewhen(io.allow_to_go) { -// unsignedDiv_done := false.B -// } -// // 被除数和除数的valid信号 -// signedDiv.s_axis_dividend_tvalid := io.start && !signedDiv_sent(0) && io.signed -// signedDiv.s_axis_divisor_tvalid := io.start && !signedDiv_sent(1) && io.signed + when(unsignedDiv.m_axis_dout_tvalid && !io.allow_to_go) { + unsignedDiv_done := true.B + }.elsewhen(io.allow_to_go) { + unsignedDiv_done := false.B + } + // 被除数和除数的valid信号 + signedDiv.s_axis_dividend_tvalid := io.start && !signedDiv_sent(0) && io.signed + signedDiv.s_axis_divisor_tvalid := io.start && !signedDiv_sent(1) && io.signed -// unsignedDiv.s_axis_dividend_tvalid := io.start && !unsignedDiv_sent(0) && !io.signed -// unsignedDiv.s_axis_divisor_tvalid := io.start && !unsignedDiv_sent(1) && !io.signed + unsignedDiv.s_axis_dividend_tvalid := io.start && !unsignedDiv_sent(0) && !io.signed + unsignedDiv.s_axis_divisor_tvalid := io.start && !unsignedDiv_sent(1) && !io.signed -// // 被除数和除数的值 -// signedDiv.s_axis_dividend_tdata := io.src1 -// signedDiv.s_axis_divisor_tdata := io.src2 + // 被除数和除数的值 + signedDiv.s_axis_dividend_tdata := io.src1 + signedDiv.s_axis_divisor_tdata := io.src2 -// unsignedDiv.s_axis_dividend_tdata := io.src1 -// unsignedDiv.s_axis_divisor_tdata := io.src2 + unsignedDiv.s_axis_dividend_tdata := io.src1 + unsignedDiv.s_axis_divisor_tdata := io.src2 -// io.ready := Mux( -// io.signed, -// signedDiv.m_axis_dout_tvalid || signedDiv_done, -// unsignedDiv.m_axis_dout_tvalid || unsignedDiv_done, -// ) -// val signedRes = -// Cat(signedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), signedDiv.m_axis_dout_tdata(HILO_WID - 1, DATA_WID)) -// val unsignedRes = -// Cat(unsignedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), unsignedDiv.m_axis_dout_tdata(HILO_WID - 1, DATA_WID)) -// io.result := Mux(io.signed, signedRes, unsignedRes) -// } else { -// val cnt = RegInit(0.U(log2Ceil(config.divClockNum + 1).W)) -// cnt := MuxCase( -// cnt, -// Seq( -// (io.start && !io.ready) -> (cnt + 1.U), -// io.allow_to_go -> 0.U, -// ), -// ) + io.ready := Mux( + io.signed, + signedDiv.m_axis_dout_tvalid || signedDiv_done, + unsignedDiv.m_axis_dout_tvalid || unsignedDiv_done + ) + val signedRes = + Cat(signedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), signedDiv.m_axis_dout_tdata(64 - 1, DATA_WID)) + val unsignedRes = + Cat(unsignedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), unsignedDiv.m_axis_dout_tdata(64 - 1, DATA_WID)) + io.result := Mux(io.signed, signedRes, unsignedRes) + } else { + val cnt = RegInit(0.U(log2Ceil(config.divClockNum + 1).W)) + cnt := MuxCase( + cnt, + Seq( + (io.start && !io.ready) -> (cnt + 1.U), + io.allow_to_go -> 0.U + ) + ) -// val div_signed = io.signed + val div_signed = io.signed -// val dividend_signed = io.src1(31) & div_signed -// val divisor_signed = io.src2(31) & div_signed + val dividend_signed = io.src1(31) & div_signed + val divisor_signed = io.src2(31) & div_signed -// val dividend_abs = Mux(dividend_signed, (-io.src1).asUInt, io.src1.asUInt) -// val divisor_abs = Mux(divisor_signed, (-io.src2).asUInt, io.src2.asUInt) + val dividend_abs = Mux(dividend_signed, (-io.src1).asUInt, io.src1.asUInt) + val divisor_abs = Mux(divisor_signed, (-io.src2).asUInt, io.src2.asUInt) -// val quotient_signed = (io.src1(31) ^ io.src2(31)) & div_signed -// val remainder_signed = io.src1(31) & div_signed + val quotient_signed = (io.src1(31) ^ io.src2(31)) & div_signed + val remainder_signed = io.src1(31) & div_signed -// val quotient_abs = dividend_abs / divisor_abs -// val remainder_abs = dividend_abs - quotient_abs * divisor_abs + val quotient_abs = dividend_abs / divisor_abs + val remainder_abs = dividend_abs - quotient_abs * divisor_abs -// val quotient = RegInit(0.S(32.W)) -// val remainder = RegInit(0.S(32.W)) + val quotient = RegInit(0.S(32.W)) + val remainder = RegInit(0.S(32.W)) -// when(io.start) { -// quotient := Mux(quotient_signed, (-quotient_abs).asSInt, quotient_abs.asSInt) -// remainder := Mux(remainder_signed, (-remainder_abs).asSInt, remainder_abs.asSInt) -// } + when(io.start) { + quotient := Mux(quotient_signed, (-quotient_abs).asSInt, quotient_abs.asSInt) + remainder := Mux(remainder_signed, (-remainder_abs).asSInt, remainder_abs.asSInt) + } -// io.ready := cnt >= config.divClockNum.U -// io.result := Cat(remainder, quotient) -// } -// } + io.ready := cnt >= config.divClockNum.U + io.result := Cat(remainder, quotient) + } +} diff --git a/chisel/playground/src/pipeline/execute/Mul.scala b/chisel/playground/src/pipeline/execute/Mul.scala index 991e3da..f80183b 100644 --- a/chisel/playground/src/pipeline/execute/Mul.scala +++ b/chisel/playground/src/pipeline/execute/Mul.scala @@ -1,225 +1,225 @@ -// package cpu.pipeline.execute +package cpu.pipeline.execute -// import chisel3._ -// import chisel3.util._ -// import cpu.defines._ -// import cpu.defines.Const._ -// import cpu.CpuConfig +import chisel3._ +import chisel3.util._ +import cpu.defines._ +import cpu.defines.Const._ +import cpu.CpuConfig -// class SignedMul extends BlackBox with HasBlackBoxResource { -// val io = IO(new Bundle { -// val CLK = Input(Clock()) -// val CE = Input(Bool()) -// val A = Input(UInt((DATA_WID + 1).W)) -// val B = Input(UInt((DATA_WID + 1).W)) +class SignedMul extends BlackBox with HasBlackBoxResource { + val io = IO(new Bundle { + val CLK = Input(Clock()) + val CE = Input(Bool()) + val A = Input(UInt((DATA_WID + 1).W)) + val B = Input(UInt((DATA_WID + 1).W)) -// val P = Output(UInt((HILO_WID + 2).W)) + val P = Output(UInt((64 + 2).W)) + }) +} + +class Mul(implicit val config: CpuConfig) extends Module { + val io = IO(new Bundle { + val src1 = Input(UInt(DATA_WID.W)) + val src2 = Input(UInt(DATA_WID.W)) + val signed = Input(Bool()) + val start = Input(Bool()) + val allow_to_go = Input(Bool()) + + val ready = Output(Bool()) + val result = Output(UInt(64.W)) + }) + + if (config.build) { + val signedMul = Module(new SignedMul()).io + val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W)) + + cnt := MuxCase( + cnt, + Seq( + (io.start && !io.ready) -> (cnt + 1.U), + io.allow_to_go -> 0.U + ) + ) + + signedMul.CLK := clock + signedMul.CE := io.start + when(io.signed) { + signedMul.A := Cat(io.src1(DATA_WID - 1), io.src1) + signedMul.B := Cat(io.src2(DATA_WID - 1), io.src2) + }.otherwise { + signedMul.A := Cat(0.U(1.W), io.src1) + signedMul.B := Cat(0.U(1.W), io.src2) + } + io.ready := cnt >= config.mulClockNum.U + io.result := signedMul.P(64 - 1, 0) + } else { + val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W)) + cnt := MuxCase( + cnt, + Seq( + (io.start && !io.ready) -> (cnt + 1.U), + io.allow_to_go -> 0.U + ) + ) + + val signed = RegInit(0.U(64.W)) + val unsigned = RegInit(0.U(64.W)) + when(io.start) { + signed := (io.src1.asSInt * io.src2.asSInt).asUInt + unsigned := io.src1 * io.src2 + } + io.result := Mux(io.signed, signed, unsigned) + io.ready := cnt >= config.mulClockNum.U + } +} + +// class ArrayMulDataModule(len: Int) extends Module { +// val io = IO(new Bundle() { +// val a, b = Input(UInt(len.W)) +// val regEnables = Input(Vec(2, Bool())) +// val result = Output(UInt((2 * len).W)) // }) -// } +// val (a, b) = (io.a, io.b) -// class Mul(implicit val config: CpuConfig) extends Module { -// val io = IO(new Bundle { -// val src1 = Input(UInt(DATA_WID.W)) -// val src2 = Input(UInt(DATA_WID.W)) -// val signed = Input(Bool()) -// val start = Input(Bool()) -// val allow_to_go = Input(Bool()) +// val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len + 1).W)) +// b_sext := SignExt(b, len + 1) +// bx2 := b_sext << 1 +// neg_b := (~b_sext).asUInt() +// neg_bx2 := neg_b << 1 -// val ready = Output(Bool()) -// val result = Output(UInt(HILO_WID.W)) -// }) +// val columns: Array[Seq[Bool]] = Array.fill(2 * len)(Seq()) -// if (config.build) { -// val signedMul = Module(new SignedMul()).io -// val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W)) - -// cnt := MuxCase( -// cnt, +// var last_x = WireInit(0.U(3.W)) +// for (i <- Range(0, len, 2)) { +// val x = if (i == 0) Cat(a(1, 0), 0.U(1.W)) else if (i + 1 == len) SignExt(a(i, i - 1), 3) else a(i + 1, i - 1) +// val pp_temp = MuxLookup( +// x, +// 0.U, // Seq( -// (io.start && !io.ready) -> (cnt + 1.U), -// io.allow_to_go -> 0.U, +// 1.U -> b_sext, +// 2.U -> b_sext, +// 3.U -> bx2, +// 4.U -> neg_bx2, +// 5.U -> neg_b, +// 6.U -> neg_b, // ), // ) - -// signedMul.CLK := clock -// signedMul.CE := io.start -// when(io.signed) { -// signedMul.A := Cat(io.src1(DATA_WID - 1), io.src1) -// signedMul.B := Cat(io.src2(DATA_WID - 1), io.src2) -// }.otherwise { -// signedMul.A := Cat(0.U(1.W), io.src1) -// signedMul.B := Cat(0.U(1.W), io.src2) -// } -// io.ready := cnt >= config.mulClockNum.U -// io.result := signedMul.P(HILO_WID - 1, 0) -// } else { -// val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W)) -// cnt := MuxCase( -// cnt, +// val s = pp_temp(len) +// val t = MuxLookup( +// last_x, +// 0.U(2.W), // Seq( -// (io.start && !io.ready) -> (cnt + 1.U), -// io.allow_to_go -> 0.U, +// 4.U -> 2.U(2.W), +// 5.U -> 1.U(2.W), +// 6.U -> 1.U(2.W), // ), // ) - -// val signed = RegInit(0.U(HILO_WID.W)) -// val unsigned = RegInit(0.U(HILO_WID.W)) -// when(io.start) { -// signed := (io.src1.asSInt * io.src2.asSInt).asUInt -// unsigned := io.src1 * io.src2 +// last_x = x +// val (pp, weight) = i match { +// case 0 => +// (Cat(~s, s, s, pp_temp), 0) +// case n if (n == len - 1) || (n == len - 2) => +// (Cat(~s, pp_temp, t), i - 2) +// case _ => +// (Cat(1.U(1.W), ~s, pp_temp, t), i - 2) +// } +// for (j <- columns.indices) { +// if (j >= weight && j < (weight + pp.getWidth)) { +// columns(j) = columns(j) :+ pp(j - weight) +// } // } -// io.result := Mux(io.signed, signed, unsigned) -// io.ready := cnt >= config.mulClockNum.U // } + +// def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = { +// var sum = Seq[Bool]() +// var cout1 = Seq[Bool]() +// var cout2 = Seq[Bool]() +// col.size match { +// case 1 => // do nothing +// sum = col ++ cin +// case 2 => +// val c22 = Module(new C22) +// c22.io.in := col +// sum = c22.io.out(0).asBool() +: cin +// cout2 = Seq(c22.io.out(1).asBool()) +// case 3 => +// val c32 = Module(new C32) +// c32.io.in := col +// sum = c32.io.out(0).asBool() +: cin +// cout2 = Seq(c32.io.out(1).asBool()) +// case 4 => +// val c53 = Module(new C53) +// for ((x, y) <- c53.io.in.take(4) zip col) { +// x := y +// } +// c53.io.in.last := (if (cin.nonEmpty) cin.head else 0.U) +// sum = Seq(c53.io.out(0).asBool()) ++ (if (cin.nonEmpty) cin.drop(1) else Nil) +// cout1 = Seq(c53.io.out(1).asBool()) +// cout2 = Seq(c53.io.out(2).asBool()) +// case n => +// val cin_1 = if (cin.nonEmpty) Seq(cin.head) else Nil +// val cin_2 = if (cin.nonEmpty) cin.drop(1) else Nil +// val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1) +// val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2) +// sum = s_1 ++ s_2 +// cout1 = c_1_1 ++ c_2_1 +// cout2 = c_1_2 ++ c_2_2 +// } +// (sum, cout1, cout2) +// } + +// def max(in: Iterable[Int]): Int = in.reduce((a, b) => if (a > b) a else b) +// def addAll(cols: Array[Seq[Bool]], depth: Int): (UInt, UInt) = { +// if (max(cols.map(_.size)) <= 2) { +// val sum = Cat(cols.map(_(0)).reverse) +// var k = 0 +// while (cols(k).size == 1) k = k + 1 +// val carry = Cat(cols.drop(k).map(_(1)).reverse) +// (sum, Cat(carry, 0.U(k.W))) +// } else { +// val columns_next = Array.fill(2 * len)(Seq[Bool]()) +// var cout1, cout2 = Seq[Bool]() +// for (i <- cols.indices) { +// val (s, c1, c2) = addOneColumn(cols(i), cout1) +// columns_next(i) = s ++ cout2 +// cout1 = c1 +// cout2 = c2 +// } + +// val needReg = depth == 4 +// val toNextLayer = +// if (needReg) +// columns_next.map(_.map(x => RegEnable(x, io.regEnables(1)))) +// else +// columns_next + +// addAll(toNextLayer, depth + 1) +// } +// } + +// val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0)))) +// val (sum, carry) = addAll(cols = columns_reg, depth = 0) + +// io.result := sum + carry // } -// // class ArrayMulDataModule(len: Int) extends Module { -// // val io = IO(new Bundle() { -// // val a, b = Input(UInt(len.W)) -// // val regEnables = Input(Vec(2, Bool())) -// // val result = Output(UInt((2 * len).W)) -// // }) -// // val (a, b) = (io.a, io.b) +// class ArrayMultiplier(len: Int) extends Module { +// override def latency = 2 -// // val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len + 1).W)) -// // b_sext := SignExt(b, len + 1) -// // bx2 := b_sext << 1 -// // neg_b := (~b_sext).asUInt() -// // neg_bx2 := neg_b << 1 +// val mulDataModule = Module(new ArrayMulDataModule(len)) +// mulDataModule.io.a := io.in.bits.src(0) +// mulDataModule.io.b := io.in.bits.src(1) +// mulDataModule.io.regEnables := VecInit((1 to latency) map (i => regEnable(i))) +// val result = mulDataModule.io.result -// // val columns: Array[Seq[Bool]] = Array.fill(2 * len)(Seq()) +// var ctrlVec = Seq(ctrl) +// for (i <- 1 to latency) { +// ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i - 1)) +// } +// val 32 = len - 1 +// val res = Mux(ctrlVec.last.isHi, result(2 * 32 - 1, 32), result(32 - 1, 0)) -// // var last_x = WireInit(0.U(3.W)) -// // for (i <- Range(0, len, 2)) { -// // val x = if (i == 0) Cat(a(1, 0), 0.U(1.W)) else if (i + 1 == len) SignExt(a(i, i - 1), 3) else a(i + 1, i - 1) -// // val pp_temp = MuxLookup( -// // x, -// // 0.U, -// // Seq( -// // 1.U -> b_sext, -// // 2.U -> b_sext, -// // 3.U -> bx2, -// // 4.U -> neg_bx2, -// // 5.U -> neg_b, -// // 6.U -> neg_b, -// // ), -// // ) -// // val s = pp_temp(len) -// // val t = MuxLookup( -// // last_x, -// // 0.U(2.W), -// // Seq( -// // 4.U -> 2.U(2.W), -// // 5.U -> 1.U(2.W), -// // 6.U -> 1.U(2.W), -// // ), -// // ) -// // last_x = x -// // val (pp, weight) = i match { -// // case 0 => -// // (Cat(~s, s, s, pp_temp), 0) -// // case n if (n == len - 1) || (n == len - 2) => -// // (Cat(~s, pp_temp, t), i - 2) -// // case _ => -// // (Cat(1.U(1.W), ~s, pp_temp, t), i - 2) -// // } -// // for (j <- columns.indices) { -// // if (j >= weight && j < (weight + pp.getWidth)) { -// // columns(j) = columns(j) :+ pp(j - weight) -// // } -// // } -// // } +// io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31, 0), 32), res) -// // def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = { -// // var sum = Seq[Bool]() -// // var cout1 = Seq[Bool]() -// // var cout2 = Seq[Bool]() -// // col.size match { -// // case 1 => // do nothing -// // sum = col ++ cin -// // case 2 => -// // val c22 = Module(new C22) -// // c22.io.in := col -// // sum = c22.io.out(0).asBool() +: cin -// // cout2 = Seq(c22.io.out(1).asBool()) -// // case 3 => -// // val c32 = Module(new C32) -// // c32.io.in := col -// // sum = c32.io.out(0).asBool() +: cin -// // cout2 = Seq(c32.io.out(1).asBool()) -// // case 4 => -// // val c53 = Module(new C53) -// // for ((x, y) <- c53.io.in.take(4) zip col) { -// // x := y -// // } -// // c53.io.in.last := (if (cin.nonEmpty) cin.head else 0.U) -// // sum = Seq(c53.io.out(0).asBool()) ++ (if (cin.nonEmpty) cin.drop(1) else Nil) -// // cout1 = Seq(c53.io.out(1).asBool()) -// // cout2 = Seq(c53.io.out(2).asBool()) -// // case n => -// // val cin_1 = if (cin.nonEmpty) Seq(cin.head) else Nil -// // val cin_2 = if (cin.nonEmpty) cin.drop(1) else Nil -// // val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1) -// // val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2) -// // sum = s_1 ++ s_2 -// // cout1 = c_1_1 ++ c_2_1 -// // cout2 = c_1_2 ++ c_2_2 -// // } -// // (sum, cout1, cout2) -// // } - -// // def max(in: Iterable[Int]): Int = in.reduce((a, b) => if (a > b) a else b) -// // def addAll(cols: Array[Seq[Bool]], depth: Int): (UInt, UInt) = { -// // if (max(cols.map(_.size)) <= 2) { -// // val sum = Cat(cols.map(_(0)).reverse) -// // var k = 0 -// // while (cols(k).size == 1) k = k + 1 -// // val carry = Cat(cols.drop(k).map(_(1)).reverse) -// // (sum, Cat(carry, 0.U(k.W))) -// // } else { -// // val columns_next = Array.fill(2 * len)(Seq[Bool]()) -// // var cout1, cout2 = Seq[Bool]() -// // for (i <- cols.indices) { -// // val (s, c1, c2) = addOneColumn(cols(i), cout1) -// // columns_next(i) = s ++ cout2 -// // cout1 = c1 -// // cout2 = c2 -// // } - -// // val needReg = depth == 4 -// // val toNextLayer = -// // if (needReg) -// // columns_next.map(_.map(x => RegEnable(x, io.regEnables(1)))) -// // else -// // columns_next - -// // addAll(toNextLayer, depth + 1) -// // } -// // } - -// // val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0)))) -// // val (sum, carry) = addAll(cols = columns_reg, depth = 0) - -// // io.result := sum + carry -// // } - -// // class ArrayMultiplier(len: Int) extends Module { -// // override def latency = 2 - -// // val mulDataModule = Module(new ArrayMulDataModule(len)) -// // mulDataModule.io.a := io.in.bits.src(0) -// // mulDataModule.io.b := io.in.bits.src(1) -// // mulDataModule.io.regEnables := VecInit((1 to latency) map (i => regEnable(i))) -// // val result = mulDataModule.io.result - -// // var ctrlVec = Seq(ctrl) -// // for (i <- 1 to latency) { -// // ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i - 1)) -// // } -// // val 32 = len - 1 -// // val res = Mux(ctrlVec.last.isHi, result(2 * 32 - 1, 32), result(32 - 1, 0)) - -// // io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31, 0), 32), res) - -// // XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n") -// // } +// XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n") +// }