From c3cfc6986b9b2b38af7324591dd4a54c21a093a7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Mar 2019 22:05:11 +0100 Subject: [PATCH] Add implementations of ssum/dsum and csum/zsum as trivial copies of asum/zsasum with the fabs calls replaced by fmov to preserve code structure --- kernel/alpha/sum.S | 206 +++++++++++++++++++++++++++++++++++++++++++ kernel/alpha/zsum.S | 208 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 414 insertions(+) create mode 100644 kernel/alpha/sum.S create mode 100644 kernel/alpha/zsum.S diff --git a/kernel/alpha/sum.S b/kernel/alpha/sum.S new file mode 100644 index 000000000..3902817a7 --- /dev/null +++ b/kernel/alpha/sum.S @@ -0,0 +1,206 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define ASSEMBLER +#include "common.h" +#include "version.h" + +#define PREFETCHSIZE 88 + +#define N $16 +#define X $17 +#define INCX $18 +#define I $19 + +#define s0 $f0 +#define s1 $f1 +#define s2 $f10 +#define s3 $f11 + +#define a0 $f12 +#define a1 $f13 +#define a2 $f14 +#define a3 $f15 +#define a4 $f16 +#define a5 $f17 +#define a6 $f18 +#define a7 $f19 + +#define t0 $f20 +#define t1 $f21 +#define t2 $f22 +#define t3 $f23 + + PROLOGUE + PROFCODE + + fclr s0 + unop + fclr t0 + ble N, $L999 + + sra N, 3, I + fclr s1 + fclr s2 + ble I, $L15 + + LD a0, 0 * SIZE(X) + fclr t1 + SXADDQ INCX, X, X + fclr t2 + + LD a1, 0 * SIZE(X) + fclr t3 + SXADDQ INCX, X, X + fclr s3 + + LD a2, 0 * SIZE(X) + SXADDQ INCX, X, X + LD a3, 0 * SIZE(X) + SXADDQ INCX, X, X + + LD a4, 0 * SIZE(X) + SXADDQ INCX, X, X + LD a5, 0 * SIZE(X) + SXADDQ INCX, X, X + + lda I, -1(I) + ble I, $L13 + .align 4 + +$L12: + ADD s0, t0, s0 + ldl $31, PREFETCHSIZE * 2 * SIZE(X) + fmov a0, t0 + lda I, -1(I) + + ADD s1, t1, s1 + LD a6, 0 * SIZE(X) + fmov a1, t1 + SXADDQ INCX, X, X + + ADD s2, t2, s2 + LD a7, 0 * SIZE(X) + fmov a2, t2 + SXADDQ INCX, X, X + + ADD s3, t3, s3 + LD a0, 0 * SIZE(X) + fmov a3, t3 + SXADDQ INCX, X, X + + ADD s0, t0, s0 + LD a1, 0 * SIZE(X) + fmov a4, t0 + SXADDQ INCX, X, X + + ADD s1, t1, s1 + LD a2, 0 * SIZE(X) + fmov a5, t1 + SXADDQ INCX, X, X + + ADD s2, t2, s2 + LD a3, 0 * SIZE(X) + fmov a6, t2 + SXADDQ INCX, X, X + + ADD s3, t3, s3 + LD a4, 0 * SIZE(X) + fmov a7, t3 + SXADDQ INCX, X, X + + LD a5, 0 * SIZE(X) + unop + SXADDQ INCX, X, X + bne I, $L12 + .align 4 + +$L13: + ADD s0, t0, s0 + LD a6, 0 * SIZE(X) + fmov a0, t0 + SXADDQ INCX, X, X + + ADD s1, t1, s1 + LD a7, 0 * SIZE(X) + fmov a1, t1 + SXADDQ INCX, X, X + + ADD s2, t2, s2 + fmov a2, t2 + ADD s3, t3, s3 + fmov a3, t3 + + ADD s0, t0, s0 + fmov a4, t0 + ADD s1, t1, s1 + fmov a5, t1 + ADD s2, t2, s2 + fmov a6, t2 + ADD s3, t3, s3 + fmov a7, t3 + + ADD s1, t1, s1 + ADD s2, t2, s2 + ADD s3, t3, s3 + + ADD s0, s1, s0 + ADD s2, s3, s2 + .align 4 + +$L15: + and N, 7, I + ADD s0, s2, s0 + unop + ble I, $L999 + .align 4 + +$L17: + ADD s0, t0, s0 + LD a0, 0 * SIZE(X) + SXADDQ INCX, X, X + fmov a0, t0 + + lda I, -1(I) + bne I, $L17 + .align 4 + +$L999: + ADD s0, t0, s0 + ret + EPILOGUE diff --git a/kernel/alpha/zsum.S b/kernel/alpha/zsum.S new file mode 100644 index 000000000..1ad0eb137 --- /dev/null +++ b/kernel/alpha/zsum.S @@ -0,0 +1,208 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define ASSEMBLER +#include "common.h" +#include "version.h" + +#define PREFETCHSIZE 88 + +#define N $16 +#define X $17 +#define INCX $18 +#define I $19 + +#define s0 $f0 +#define s1 $f1 +#define s2 $f10 +#define s3 $f11 + +#define a0 $f12 +#define a1 $f13 +#define a2 $f14 +#define a3 $f15 +#define a4 $f16 +#define a5 $f17 +#define a6 $f18 +#define a7 $f19 + +#define t0 $f20 +#define t1 $f21 +#define t2 $f22 +#define t3 $f23 + + PROLOGUE + PROFCODE + + fclr s0 + unop + fclr t0 + addq INCX, INCX, INCX + + fclr s1 + unop + fclr t1 + ble N, $L999 + + fclr s2 + sra N, 2, I + fclr s3 + ble I, $L15 + + LD a0, 0 * SIZE(X) + fclr t2 + LD a1, 1 * SIZE(X) + SXADDQ INCX, X, X + + LD a2, 0 * SIZE(X) + fclr t3 + LD a3, 1 * SIZE(X) + SXADDQ INCX, X, X + + LD a4, 0 * SIZE(X) + LD a5, 1 * SIZE(X) + SXADDQ INCX, X, X + lda I, -1(I) + + ble I, $L13 + .align 4 + +$L12: + ADD s0, t0, s0 + ldl $31, PREFETCHSIZE * SIZE(X) + fmov a0, t0 + lda I, -1(I) + + ADD s1, t1, s1 + LD a6, 0 * SIZE(X) + fmov a1, t1 + unop + + ADD s2, t2, s2 + LD a7, 1 * SIZE(X) + fmov a2, t2 + SXADDQ INCX, X, X + + ADD s3, t3, s3 + LD a0, 0 * SIZE(X) + fmov a3, t3 + unop + + ADD s0, t0, s0 + LD a1, 1 * SIZE(X) + fmov a4, t0 + SXADDQ INCX, X, X + + ADD s1, t1, s1 + LD a2, 0 * SIZE(X) + fmov a5, t1 + unop + + ADD s2, t2, s2 + LD a3, 1 * SIZE(X) + fmov a6, t2 + SXADDQ INCX, X, X + + ADD s3, t3, s3 + LD a4, 0 * SIZE(X) + fmov a7, t3 + unop + + LD a5, 1 * SIZE(X) + unop + SXADDQ INCX, X, X + bne I, $L12 + .align 4 + +$L13: + ADD s0, t0, s0 + LD a6, 0 * SIZE(X) + fmov a0, t0 + + ADD s1, t1, s1 + LD a7, 1 * SIZE(X) + fmov a1, t1 + SXADDQ INCX, X, X + + ADD s2, t2, s2 + fmov a2, t2 + ADD s3, t3, s3 + fmov a3, t3 + + ADD s0, t0, s0 + fmov a4, t0 + ADD s1, t1, s1 + fmov a5, t1 + ADD s2, t2, s2 + fmov a6, t2 + ADD s3, t3, s3 + fmov a7, t3 + + ADD s2, t2, s2 + ADD s3, t3, s3 + + .align 4 + +$L15: + ADD s0, s2, s0 + and N, 3, I + ADD s1, s3, s1 + ble I, $L999 + .align 4 + +$L17: + ADD s0, t0, s0 + LD a0, 0 * SIZE(X) + fmov a0, t0 + lda I, -1(I) + + ADD s1, t1, s1 + LD a1, 1 * SIZE(X) + fmov a1, t1 + SXADDQ INCX, X, X + + bne I, $L17 + .align 4 + +$L999: + ADD s0, t0, s0 + ADD s1, t1, s1 + + ADD s0, s1, s0 + ret + EPILOGUE