diff --git a/kernel/x86/sum.S b/kernel/x86/sum.S new file mode 100644 index 000000000..b24f34c8b --- /dev/null +++ b/kernel/x86/sum.S @@ -0,0 +1,207 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define ASSEMBLER +#include "common.h" + +#define STACK 8 +#define ARGS 0 + +#define STACK_M 4 + STACK + ARGS(%esp) +#define STACK_X 8 + STACK + ARGS(%esp) +#define STACK_INCX 12 + STACK + ARGS(%esp) + +#define M %edx +#define X %ecx +#define INCX %esi + +#define I %eax + +#include "l1param.h" + + PROLOGUE + + pushl %esi + pushl %ebx + + PROFCODE + +#if defined(F_INTERFACE_GFORT) || defined(F_INTERFACE_G95) + EMMS +#endif + + movl STACK_M, M + movl STACK_X, X + movl STACK_INCX, INCX + +#ifdef F_INTERFACE + movl (M), M + movl (INCX), INCX +#endif + + fldz + testl M, M + jle .L999 + testl INCX, INCX + jle .L999 + + sall $BASE_SHIFT, INCX + fldz + fldz + fldz + cmpl $SIZE, INCX + jne .L40 + + movl M, I + sarl $3, I + jle .L20 + ALIGN_4 + +.L10: +#ifdef PREFETCH + PREFETCH (PREFETCHSIZE + 0) - PREOFFSET(X) +#endif + + FLD 0 * SIZE(X) + FLD 1 * SIZE(X) + FLD 2 * SIZE(X) + FLD 3 * SIZE(X) + + faddp %st, %st(7) + faddp %st, %st(5) + faddp %st, %st(3) + faddp %st, %st(1) + + FLD 4 * SIZE(X) + FLD 5 * SIZE(X) + FLD 6 * SIZE(X) + FLD 7 * SIZE(X) + + addl $8 * SIZE, X + + faddp %st, %st(7) + faddp %st, %st(5) + faddp %st, %st(3) + faddp %st, %st(1) + + decl I + jg .L10 + ALIGN_4 + +.L20: + movl M, I + andl $7, I + jle .L998 + ALIGN_4 + + +.L21: + FLD (X) + faddp %st,%st(1) + addl $1 * SIZE, X + decl I + jg .L21 + jmp .L998 + ALIGN_4 + +.L40: + movl M, I + sarl $3, I + jle .L60 + ALIGN_4 + +.L50: + FLD (X) + addl INCX, X + FLD (X) + addl INCX, X + FLD (X) + addl INCX, X + FLD (X) + addl INCX, X + + faddp %st, %st(7) + faddp %st, %st(5) + faddp %st, %st(3) + faddp %st, %st(1) + + FLD (X) + addl INCX, X + FLD (X) + addl INCX, X + FLD (X) + addl INCX, X + FLD (X) + addl INCX, X + + faddp %st, %st(7) + faddp %st, %st(5) + faddp %st, %st(3) + faddp %st, %st(1) + + decl I + jg .L50 + ALIGN_4 + +.L60: + movl M, I + andl $7, I + jle .L998 + ALIGN_4 + + +.L61: + FLD (X) + addl INCX, X + faddp %st,%st(1) + decl I + jg .L61 + ALIGN_4 + +.L998: + faddp %st,%st(2) + faddp %st,%st(1) + faddp %st,%st(1) + ALIGN_4 + +.L999: + popl %ebx + popl %esi + ret + + EPILOGUE diff --git a/kernel/x86/zsum.S b/kernel/x86/zsum.S new file mode 100644 index 000000000..cd2ce61db --- /dev/null +++ b/kernel/x86/zsum.S @@ -0,0 +1,208 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define ASSEMBLER +#include "common.h" + +#define STACK 8 +#define ARGS 0 + +#define STACK_M 4 + STACK + ARGS(%esp) +#define STACK_X 8 + STACK + ARGS(%esp) +#define STACK_INCX 12 + STACK + ARGS(%esp) + +#define M %edx +#define X %ecx +#define INCX %esi + +#define I %eax + +#include "l1param.h" + + PROLOGUE + + pushl %esi + pushl %ebx + + PROFCODE + +#if defined(F_INTERFACE_GFORT) || defined(F_INTERFACE_G95) + EMMS +#endif + + movl STACK_M, M + movl STACK_X, X + movl STACK_INCX, INCX + +#ifdef F_INTERFACE + movl (M), M + movl (INCX), INCX +#endif + + fldz + testl M, M + jle .L999 + testl INCX, INCX + jle .L999 + + sall $ZBASE_SHIFT, INCX + + fldz + fldz + fldz + cmpl $SIZE * 2, INCX + jne .L40 + + movl M, I + sarl $2, I + jle .L20 + ALIGN_4 + +.L10: +#ifdef PREFETCH + PREFETCH (PREFETCHSIZE + 0) - PREOFFSET(X) +#endif + + FLD 0 * SIZE(X) + FLD 1 * SIZE(X) + FLD 2 * SIZE(X) + FLD 3 * SIZE(X) + + faddp %st, %st(7) + faddp %st, %st(5) + faddp %st, %st(3) + faddp %st, %st(1) + + FLD 4 * SIZE(X) + FLD 5 * SIZE(X) + FLD 6 * SIZE(X) + FLD 7 * SIZE(X) + + addl $8 * SIZE, X + + faddp %st, %st(7) + faddp %st, %st(5) + faddp %st, %st(3) + faddp %st, %st(1) + + decl I + jg .L10 + ALIGN_4 + +.L20: + movl M, I + andl $3, I + jle .L998 + ALIGN_4 + + +.L21: + FLD 0 * SIZE(X) + FLD 1 * SIZE(X) + faddp %st,%st(3) + faddp %st,%st(1) + addl $2 * SIZE, X + decl I + jg .L21 + jmp .L998 + ALIGN_4 + +.L40: + movl M, I + sarl $2, I + jle .L60 + ALIGN_4 + +.L50: + FLD 0 * SIZE(X) + FLD 1 * SIZE(X) + addl INCX, X + FLD 0 * SIZE(X) + FLD 1 * SIZE(X) + addl INCX, X + + faddp %st, %st(7) + faddp %st, %st(5) + faddp %st, %st(3) + faddp %st, %st(1) + + FLD 0 * SIZE(X) + FLD 1 * SIZE(X) + addl INCX, X + FLD 0 * SIZE(X) + FLD 1 * SIZE(X) + addl INCX, X + + faddp %st, %st(7) + faddp %st, %st(5) + faddp %st, %st(3) + faddp %st, %st(1) + + decl I + jg .L50 + ALIGN_4 + +.L60: + movl M, I + andl $3, I + jle .L998 + ALIGN_4 + + +.L61: + FLD 0 * SIZE(X) + FLD 1 * SIZE(X) + addl INCX, X + faddp %st,%st(3) + faddp %st,%st(1) + decl I + jg .L61 + ALIGN_4 + +.L998: + faddp %st,%st(2) + faddp %st,%st(1) + faddp %st,%st(1) + ALIGN_4 + +.L999: + popl %ebx + popl %esi + ret + + EPILOGUE