From 70f2a4e0d70609f13c9f35112b90516830c30689 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Mar 2019 22:25:06 +0100 Subject: [PATCH] Add SPARC implementation of ?sum as trivial copy of ?asum with the fabs replaced by fmov to preserve code structure --- kernel/sparc/sum.S | 325 +++++++++++++++++++++++++++++++++++++++++++ kernel/sparc/zsum.S | 327 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 652 insertions(+) create mode 100644 kernel/sparc/sum.S create mode 100644 kernel/sparc/zsum.S diff --git a/kernel/sparc/sum.S b/kernel/sparc/sum.S new file mode 100644 index 000000000..f26abb85f --- /dev/null +++ b/kernel/sparc/sum.S @@ -0,0 +1,325 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define ASSEMBLER +#include "common.h" + +#define N %i0 +#define X %i1 +#define INCX %i2 +#define I %i3 + +#ifdef DOUBLE +#define c1 %f0 +#define c2 %f2 +#define t1 %f8 +#define t2 %f10 +#define t3 %f12 +#define t4 %f14 + +#define a1 %f16 +#define a2 %f18 +#define a3 %f20 +#define a4 %f22 +#define a5 %f24 +#define a6 %f26 +#define a7 %f28 +#define a8 %f30 +#else +#define c1 %f0 +#define c2 %f1 +#define t1 %f4 +#define t2 %f5 +#define t3 %f6 +#define t4 %f7 + +#define a1 %f8 +#define a2 %f9 +#define a3 %f10 +#define a4 %f11 +#define a5 %f12 +#define a6 %f13 +#define a7 %f14 +#define a8 %f15 +#endif + + PROLOGUE + SAVESP + + FCLR(0) + + sll INCX, BASE_SHIFT, INCX + + FMOV c1, c2 + FMOV c1, t1 + FMOV c1, t2 + FMOV c1, t3 + FMOV c1, t4 + + cmp INCX, 0 + ble .LL19 + cmp INCX, SIZE + bne .LL50 + + sra N, 3, I + cmp I, 0 + ble,pn %icc, .LL15 + nop + + LDF [X + 0 * SIZE], a1 + add I, -1, I + LDF [X + 1 * SIZE], a2 + cmp I, 0 + LDF [X + 2 * SIZE], a3 + LDF [X + 3 * SIZE], a4 + LDF [X + 4 * SIZE], a5 + LDF [X + 5 * SIZE], a6 + LDF [X + 6 * SIZE], a7 + LDF [X + 7 * SIZE], a8 + + ble,pt %icc, .LL12 + add X, 8 * SIZE, X + +#define PREFETCHSIZE 128 + +.LL11: + FADD c1, t1, c1 + prefetch [X + PREFETCHSIZE * SIZE], 0 + FMOV a1, t1 + LDF [X + 0 * SIZE], a1 + + FADD c2, t2, c2 + add I, -1, I + FMOV a2, t2 + LDF [X + 1 * SIZE], a2 + + FADD c1, t3, c1 + cmp I, 0 + FMOV a3, t3 + LDF [X + 2 * SIZE], a3 + + FADD c2, t4, c2 + nop + FMOV a4, t4 + LDF [X + 3 * SIZE], a4 + + FADD c1, t1, c1 + nop + FMOV a5, t1 + LDF [X + 4 * SIZE], a5 + + FADD c2, t2, c2 + nop + FMOV a6, t2 + LDF [X + 5 * SIZE], a6 + + FADD c1, t3, c1 + FMOV a7, t3 + LDF [X + 6 * SIZE], a7 + add X, 8 * SIZE, X + + FADD c2, t4, c2 + FMOV a8, t4 + bg,pt %icc, .LL11 + LDF [X - 1 * SIZE], a8 + +.LL12: + FADD c1, t1, c1 + FMOV a1, t1 + FADD c2, t2, c2 + FMOV a2, t2 + + FADD c1, t3, c1 + FMOV a3, t3 + FADD c2, t4, c2 + FMOV a4, t4 + + FADD c1, t1, c1 + FMOV a5, t1 + FADD c2, t2, c2 + FMOV a6, t2 + + FADD c1, t3, c1 + FMOV a7, t3 + FADD c2, t4, c2 + FMOV a8, t4 + +.LL15: + and N, 7, I + cmp I, 0 + ble,a,pn %icc, .LL19 + nop + +.LL16: + LDF [X + 0 * SIZE], a1 + add I, -1, I + cmp I, 0 + FADD c1, t1, c1 + FMOV a1, t1 + bg,pt %icc, .LL16 + add X, 1 * SIZE, X + +.LL19: + FADD c1, t1, c1 + FADD c2, t2, c2 + FADD c1, t3, c1 + FADD c2, t4, c2 + + FADD c1, c2, c1 + return %i7 + 8 + clr %g0 + +.LL50: + sra N, 3, I + cmp I, 0 + ble,pn %icc, .LL55 + nop + + LDF [X + 0 * SIZE], a1 + add X, INCX, X + LDF [X + 0 * SIZE], a2 + add X, INCX, X + LDF [X + 0 * SIZE], a3 + add X, INCX, X + LDF [X + 0 * SIZE], a4 + add X, INCX, X + LDF [X + 0 * SIZE], a5 + add X, INCX, X + LDF [X + 0 * SIZE], a6 + add X, INCX, X + add I, -1, I + LDF [X + 0 * SIZE], a7 + cmp I, 0 + add X, INCX, X + LDF [X + 0 * SIZE], a8 + + ble,pt %icc, .LL52 + add X, INCX, X + +.LL51: + FADD c1, t1, c1 + add I, -1, I + FMOV a1, t1 + LDF [X + 0 * SIZE], a1 + add X, INCX, X + + FADD c2, t2, c2 + cmp I, 0 + FMOV a2, t2 + LDF [X + 0 * SIZE], a2 + add X, INCX, X + + FADD c1, t3, c1 + FMOV a3, t3 + LDF [X + 0 * SIZE], a3 + add X, INCX, X + + FADD c2, t4, c2 + FMOV a4, t4 + LDF [X + 0 * SIZE], a4 + add X, INCX, X + + FADD c1, t1, c1 + FMOV a5, t1 + LDF [X + 0 * SIZE], a5 + add X, INCX, X + + FADD c2, t2, c2 + FMOV a6, t2 + LDF [X + 0 * SIZE], a6 + add X, INCX, X + + FADD c1, t3, c1 + FMOV a7, t3 + LDF [X + 0 * SIZE], a7 + add X, INCX, X + + FADD c2, t4, c2 + FMOV a8, t4 + LDF [X + 0 * SIZE], a8 + + bg,pt %icc, .LL51 + add X, INCX, X + +.LL52: + FADD c1, t1, c1 + FMOV a1, t1 + FADD c2, t2, c2 + FMOV a2, t2 + + FADD c1, t3, c1 + FMOV a3, t3 + FADD c2, t4, c2 + FMOV a4, t4 + + FADD c1, t1, c1 + FMOV a5, t1 + FADD c2, t2, c2 + FMOV a6, t2 + + FADD c1, t3, c1 + FMOV a7, t3 + FADD c2, t4, c2 + FMOV a8, t4 + +.LL55: + and N, 7, I + cmp I, 0 + ble,a,pn %icc, .LL59 + nop + +.LL56: + LDF [X + 0 * SIZE], a1 + FADD c1, t1, c1 + add I, -1, I + FMOV a1, t1 + cmp I, 0 + bg,pt %icc, .LL56 + add X, INCX, X + +.LL59: + FADD c1, t1, c1 + FADD c2, t2, c2 + FADD c1, t3, c1 + FADD c2, t4, c2 + + FADD c1, c2, c1 + return %i7 + 8 + clr %o0 + + EPILOGUE diff --git a/kernel/sparc/zsum.S b/kernel/sparc/zsum.S new file mode 100644 index 000000000..bc167dc72 --- /dev/null +++ b/kernel/sparc/zsum.S @@ -0,0 +1,327 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define ASSEMBLER +#include "common.h" + +#define N %i0 +#define X %i1 +#define INCX %i2 +#define I %i3 + +#ifdef DOUBLE +#define c1 %f0 +#define c2 %f2 +#define t1 %f8 +#define t2 %f10 +#define t3 %f12 +#define t4 %f14 + +#define a1 %f16 +#define a2 %f18 +#define a3 %f20 +#define a4 %f22 +#define a5 %f24 +#define a6 %f26 +#define a7 %f28 +#define a8 %f30 +#else +#define c1 %f0 +#define c2 %f1 +#define t1 %f4 +#define t2 %f5 +#define t3 %f6 +#define t4 %f7 + +#define a1 %f8 +#define a2 %f9 +#define a3 %f10 +#define a4 %f11 +#define a5 %f12 +#define a6 %f13 +#define a7 %f14 +#define a8 %f15 +#endif + + PROLOGUE + SAVESP + + FCLR(0) + + sll INCX, ZBASE_SHIFT, INCX + + FMOV c1, c2 + FMOV c1, t1 + FMOV c1, t2 + FMOV c1, t3 + FMOV c1, t4 + + cmp INCX, 0 + ble .LL19 + nop + + cmp INCX, 2 * SIZE + bne .LL50 + nop + + sra N, 2, I + cmp I, 0 + ble,pn %icc, .LL15 + nop + + LDF [X + 0 * SIZE], a1 + add I, -1, I + LDF [X + 1 * SIZE], a2 + cmp I, 0 + LDF [X + 2 * SIZE], a3 + LDF [X + 3 * SIZE], a4 + LDF [X + 4 * SIZE], a5 + LDF [X + 5 * SIZE], a6 + LDF [X + 6 * SIZE], a7 + LDF [X + 7 * SIZE], a8 + + ble,pt %icc, .LL12 + add X, 8 * SIZE, X + +#define PREFETCHSIZE 32 + +.LL11: + FADD c1, t1, c1 + prefetch [X + PREFETCHSIZE * SIZE], 0 + FMOV a1, t1 + LDF [X + 0 * SIZE], a1 + + FADD c2, t2, c2 + add I, -1, I + FMOV a2, t2 + LDF [X + 1 * SIZE], a2 + + FADD c1, t3, c1 + cmp I, 0 + FMOV a3, t3 + LDF [X + 2 * SIZE], a3 + + FADD c2, t4, c2 + nop + FMOV a4, t4 + LDF [X + 3 * SIZE], a4 + + FADD c1, t1, c1 + nop + FMOV a5, t1 + LDF [X + 4 * SIZE], a5 + + FADD c2, t2, c2 + nop + FMOV a6, t2 + LDF [X + 5 * SIZE], a6 + + FADD c1, t3, c1 + FMOV a7, t3 + LDF [X + 6 * SIZE], a7 + add X, 8 * SIZE, X + + FADD c2, t4, c2 + FMOV a8, t4 + bg,pt %icc, .LL11 + LDF [X - 1 * SIZE], a8 + +.LL12: + FADD c1, t1, c1 + FMOV a1, t1 + FADD c2, t2, c2 + FMOV a2, t2 + + FADD c1, t3, c1 + FMOV a3, t3 + FADD c2, t4, c2 + FMOV a4, t4 + + FADD c1, t1, c1 + FMOV a5, t1 + FADD c2, t2, c2 + FMOV a6, t2 + + FADD c1, t3, c1 + FMOV a7, t3 + FADD c2, t4, c2 + FMOV a8, t4 + +.LL15: + and N, 3, I + cmp I, 0 + ble,a,pn %icc, .LL19 + nop + +.LL16: + LDF [X + 0 * SIZE], a1 + LDF [X + 1 * SIZE], a2 + add I, -1, I + cmp I, 0 + FADD c1, t1, c1 + FADD c2, t2, c2 + FMOV a1, t1 + FMOV a2, t2 + bg,pt %icc, .LL16 + add X, 2 * SIZE, X + +.LL19: + FADD c1, t1, c1 + FADD c2, t2, c2 + FADD c1, t3, c1 + FADD c2, t4, c2 + + FADD c1, c2, c1 + return %i7 + 8 + clr %g0 + +.LL50: + sra N, 2, I + cmp I, 0 + ble,pn %icc, .LL55 + nop + + LDF [X + 0 * SIZE], a1 + LDF [X + 1 * SIZE], a2 + add X, INCX, X + LDF [X + 0 * SIZE], a3 + LDF [X + 1 * SIZE], a4 + add X, INCX, X + LDF [X + 0 * SIZE], a5 + LDF [X + 1 * SIZE], a6 + add X, INCX, X + add I, -1, I + LDF [X + 0 * SIZE], a7 + cmp I, 0 + LDF [X + 1 * SIZE], a8 + + ble,pt %icc, .LL52 + add X, INCX, X + +.LL51: + FADD c1, t1, c1 + add I, -1, I + FMOV a1, t1 + LDF [X + 0 * SIZE], a1 + + FADD c2, t2, c2 + cmp I, 0 + FMOV a2, t2 + LDF [X + 1 * SIZE], a2 + add X, INCX, X + + FADD c1, t3, c1 + FMOV a3, t3 + LDF [X + 0 * SIZE], a3 + + FADD c2, t4, c2 + FMOV a4, t4 + LDF [X + 1 * SIZE], a4 + add X, INCX, X + + FADD c1, t1, c1 + FMOV a5, t1 + LDF [X + 0 * SIZE], a5 + + FADD c2, t2, c2 + FMOV a6, t2 + LDF [X + 1 * SIZE], a6 + add X, INCX, X + + FADD c1, t3, c1 + FMOV a7, t3 + LDF [X + 0 * SIZE], a7 + + FADD c2, t4, c2 + FMOV a8, t4 + LDF [X + 1 * SIZE], a8 + + bg,pt %icc, .LL51 + add X, INCX, X + +.LL52: + FADD c1, t1, c1 + FMOV a1, t1 + FADD c2, t2, c2 + FMOV a2, t2 + + FADD c1, t3, c1 + FMOV a3, t3 + FADD c2, t4, c2 + FMOV a4, t4 + + FADD c1, t1, c1 + FMOV a5, t1 + FADD c2, t2, c2 + FMOV a6, t2 + + FADD c1, t3, c1 + FMOV a7, t3 + FADD c2, t4, c2 + FMOV a8, t4 + +.LL55: + and N, 3, I + cmp I, 0 + ble,a,pn %icc, .LL59 + nop + +.LL56: + LDF [X + 0 * SIZE], a1 + LDF [X + 1 * SIZE], a2 + FADD c1, t1, c1 + FADD c2, t2, c2 + add I, -1, I + FMOV a1, t1 + FMOV a2, t2 + cmp I, 0 + bg,pt %icc, .LL56 + add X, INCX, X + +.LL59: + FADD c1, t1, c1 + FADD c2, t2, c2 + FADD c1, t3, c1 + FADD c2, t4, c2 + + FADD c1, c2, c1 + + return %i7 + 8 + clr %o0 + + EPILOGUE