Import GotoBLAS2 1.13 BSD version codes.

This commit is contained in:
Xianyi Zhang
2011-01-24 14:54:24 +00:00
commit 342bbc3871
1685 changed files with 1382682 additions and 0 deletions

124
kernel/alpha/KERNEL Normal file
View File

@@ -0,0 +1,124 @@
ifndef SAMINKERNEL
SAMINKERNEL = amax.S
endif
ifndef DAMINKERNEL
DAMINKERNEL = amax.S
endif
ifndef CAMINKERNEL
CAMINKERNEL = zamax.S
endif
ifndef ZAMINKERNEL
ZAMINKERNEL = zamax.S
endif
ifndef SMINKERNEL
SMINKERNEL = max.S
endif
ifndef DMINKERNEL
DMINKERNEL = max.S
endif
ifndef ISAMINKERNEL
ISAMINKERNEL = iamax.S
endif
ifndef IDAMINKERNEL
IDAMINKERNEL = iamax.S
endif
ifndef ICAMINKERNEL
ICAMINKERNEL = izamax.S
endif
ifndef IZAMINKERNEL
IZAMINKERNEL = izamax.S
endif
ifndef ISMINKERNEL
ISMINKERNEL = iamax.S
endif
ifndef IDMINKERNEL
IDMINKERNEL = iamax.S
endif
ifndef CCOPYKERNEL
CCOPYKERNEL = copy.S
endif
ifndef ZCOPYKERNEL
ZCOPYKERNEL = copy.S
endif
ifndef SNRM2KERNEL
SNRM2KERNEL = snrm2.S
endif
ifndef DNRM2KERNEL
DNRM2KERNEL = dnrm2.S
endif
ifndef CNRM2KERNEL
CNRM2KERNEL = cnrm2.S
endif
ifndef ZNRM2KERNEL
ZNRM2KERNEL = znrm2.S
endif
SGEMMKERNEL = gemm_kernel_4x4.S
SGEMM_BETA = gemm_beta.S
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMONCOPYOBJ = sgemm_oncopy.$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy.$(SUFFIX)
DGEMMKERNEL = gemm_kernel_4x4.S
DGEMM_BETA = gemm_beta.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMONCOPYOBJ = dgemm_oncopy.$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy.$(SUFFIX)
CGEMMKERNEL = zgemm_kernel_2x2.S
CGEMM_BETA = zgemm_beta.S
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy.$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_2x2.S
ZGEMM_BETA = zgemm_beta.S
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy.$(SUFFIX)
SGEMM_BETA = gemm_beta.S
DGEMM_BETA = gemm_beta.S
CGEMM_BETA = zgemm_beta.S
ZGEMM_BETA = zgemm_beta.S
STRSMKERNEL_LN = trsm_kernel_4x4_LN.S
STRSMKERNEL_LT = trsm_kernel_4x4_LT.S
STRSMKERNEL_RN = trsm_kernel_4x4_LT.S
STRSMKERNEL_RT = trsm_kernel_4x4_RT.S
DTRSMKERNEL_LN = trsm_kernel_4x4_LN.S
DTRSMKERNEL_LT = trsm_kernel_4x4_LT.S
DTRSMKERNEL_RN = trsm_kernel_4x4_LT.S
DTRSMKERNEL_RT = trsm_kernel_4x4_RT.S
CTRSMKERNEL_LN = ztrsm_kernel_2x2_LN.S
CTRSMKERNEL_LT = ztrsm_kernel_2x2_LT.S
CTRSMKERNEL_RN = ztrsm_kernel_2x2_LT.S
CTRSMKERNEL_RT = ztrsm_kernel_2x2_RT.S
ZTRSMKERNEL_LN = ztrsm_kernel_2x2_LN.S
ZTRSMKERNEL_LT = ztrsm_kernel_2x2_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_2x2_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_2x2_RT.S

2
kernel/alpha/Makefile Normal file
View File

@@ -0,0 +1,2 @@
clean ::

283
kernel/alpha/amax.S Normal file
View File

@@ -0,0 +1,283 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#ifndef USE_MIN
#define CMPLT(a, b) cmptlt a, b
#else
#define CMPLT(a, b) cmptlt b, a
#endif
#define STACKSIZE 6 * 8
PROLOGUE
PROFCODE
.frame $sp, STACKSIZE, $26, 0
lda $sp, -STACKSIZE($sp)
nop
.align 4
stt $f2, 0($sp)
fclr $f16
cmplt $31, N, $2
unop
stt $f3, 8($sp)
fclr $f17
cmplt $31, INCX, $3
unop
stt $f4, 16($sp)
fclr $f18
SXADDQ INCX, $31, INCX
unop
stt $f5, 24($sp)
fclr $f19
and $2, $3, $0
unop
stt $f6, 32($sp)
fclr $f0
sra N, 3, $1
beq $0, $End # if (n <= 0) or (incx <= 0) return
.align 4
LD $f20, 0 * SIZE(X)
unop
fabs $f20, $f0
ble $1, $L15
.align 4
fabs $f20, $f1
unop
addq X, INCX, X
unop
LD $f21, 0 * SIZE(X)
fabs $f20, $f2
addq X, INCX, X
unop
LD $f22, 0 * SIZE(X)
fabs $f20, $f3
addq X, INCX, X
unop
LD $f23, 0 * SIZE(X)
fabs $f20, $f4
addq X, INCX, X
unop
LD $f24, 0 * SIZE(X)
addq X, INCX, X
fabs $f20, $f5
unop
LD $f25, 0 * SIZE(X)
fabs $f20, $f6
addq X, INCX, X
unop
LD $f26, 0 * SIZE(X)
fabs $f20, $f28
addq X, INCX, X
lda $1, -1($1)
LD $f27, 0 * SIZE(X)
unop
addq X, INCX, X
ble $1, $L13
.align 4
$L12:
fcmovne $f16, $f12, $f4
unop
fabs $f20, $f29
ldl $31, 56 * SIZE(X)
fcmovne $f17, $f13, $f5
LD $f20, 0 * SIZE(X)
fabs $f21, $f30
addq X, INCX, X
fcmovne $f18, $f14, $f6
LD $f21, 0 * SIZE(X)
fabs $f22, $f10
addq X, INCX, X
fcmovne $f19, $f15, $f28
LD $f22, 0 * SIZE(X)
fabs $f23, $f11
addq X, INCX, X
fabs $f24, $f12
LD $f23, 0 * SIZE(X)
CMPLT($f0, $f29), $f16
addq X, INCX, X
fabs $f25, $f13
LD $f24, 0 * SIZE(X)
CMPLT($f1, $f30), $f17
addq X, INCX, X
fabs $f26, $f14
LD $f25, 0 * SIZE(X)
CMPLT($f2, $f10), $f18
addq X, INCX, X
fabs $f27, $f15
LD $f26, 0 * SIZE(X)
CMPLT($f3, $f11), $f19
addq X, INCX, X
fcmovne $f16, $f29, $f0
LD $f27, 0 * SIZE(X)
CMPLT($f4, $f12), $f16
addq X, INCX, X
fcmovne $f17, $f30, $f1
unop
CMPLT($f5, $f13), $f17
lda $1, -1($1) # i --
fcmovne $f18, $f10, $f2
unop
CMPLT($f6, $f14), $f18
unop
fcmovne $f19, $f11, $f3
unop
CMPLT($f28, $f15), $f19
bgt $1,$L12
.align 4
$L13:
fcmovne $f16, $f12, $f4
fabs $f20, $f29
fcmovne $f17, $f13, $f5
fabs $f21, $f30
fcmovne $f18, $f14, $f6
fabs $f22, $f10
fcmovne $f19, $f15, $f28
fabs $f23, $f11
fabs $f24, $f12
CMPLT($f0, $f29), $f16
fabs $f25, $f13
CMPLT($f1, $f30), $f17
fabs $f26, $f14
CMPLT($f2, $f10), $f18
fabs $f27, $f15
CMPLT($f3, $f11), $f19
fcmovne $f16, $f29, $f0
CMPLT($f4, $f12), $f16
fcmovne $f17, $f30, $f1
CMPLT($f5, $f13), $f17
fcmovne $f18, $f10, $f2
CMPLT($f6, $f14), $f18
fcmovne $f19, $f11, $f3
CMPLT($f28, $f15), $f19
fcmovne $f16, $f12, $f4
CMPLT($f0, $f1), $f16
fcmovne $f17, $f13, $f5
CMPLT($f2, $f3), $f17
fcmovne $f18, $f14, $f6
CMPLT($f4, $f5), $f18
fcmovne $f19, $f15, $f28
CMPLT($f6, $f28), $f19
fcmovne $f16, $f1, $f0
fcmovne $f17, $f3, $f2
fcmovne $f18, $f5, $f4
fcmovne $f19, $f28, $f6
CMPLT($f0, $f2), $f16
CMPLT($f4, $f6), $f17
fcmovne $f16, $f2, $f0
fcmovne $f17, $f6, $f4
CMPLT($f0, $f4), $f16
fcmovne $f16, $f4, $f0
.align 4
$L15:
and N, 7, $1
unop
unop
ble $1, $End
.align 4
$L16:
LD $f20, 0 * SIZE(X)
addq X, INCX, X
fabs $f20, $f29
CMPLT($f0, $f29), $f16
fcmovne $f16, $f29, $f0
lda $1, -1($1) # i --
bgt $1, $L16
.align 4
$End:
ldt $f2, 0($sp)
ldt $f3, 8($sp)
ldt $f4, 16($sp)
ldt $f5, 24($sp)
ldt $f6, 32($sp)
lda $sp, STACKSIZE($sp)
ret
EPILOGUE

206
kernel/alpha/asum.S Normal file
View File

@@ -0,0 +1,206 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCHSIZE 88
#define N $16
#define X $17
#define INCX $18
#define I $19
#define s0 $f0
#define s1 $f1
#define s2 $f10
#define s3 $f11
#define a0 $f12
#define a1 $f13
#define a2 $f14
#define a3 $f15
#define a4 $f16
#define a5 $f17
#define a6 $f18
#define a7 $f19
#define t0 $f20
#define t1 $f21
#define t2 $f22
#define t3 $f23
PROLOGUE
PROFCODE
fclr s0
unop
fclr t0
ble N, $L999
sra N, 3, I
fclr s1
fclr s2
ble I, $L15
LD a0, 0 * SIZE(X)
fclr t1
SXADDQ INCX, X, X
fclr t2
LD a1, 0 * SIZE(X)
fclr t3
SXADDQ INCX, X, X
fclr s3
LD a2, 0 * SIZE(X)
SXADDQ INCX, X, X
LD a3, 0 * SIZE(X)
SXADDQ INCX, X, X
LD a4, 0 * SIZE(X)
SXADDQ INCX, X, X
LD a5, 0 * SIZE(X)
SXADDQ INCX, X, X
lda I, -1(I)
ble I, $L13
.align 4
$L12:
ADD s0, t0, s0
ldl $31, PREFETCHSIZE * 2 * SIZE(X)
fabs a0, t0
lda I, -1(I)
ADD s1, t1, s1
LD a6, 0 * SIZE(X)
fabs a1, t1
SXADDQ INCX, X, X
ADD s2, t2, s2
LD a7, 0 * SIZE(X)
fabs a2, t2
SXADDQ INCX, X, X
ADD s3, t3, s3
LD a0, 0 * SIZE(X)
fabs a3, t3
SXADDQ INCX, X, X
ADD s0, t0, s0
LD a1, 0 * SIZE(X)
fabs a4, t0
SXADDQ INCX, X, X
ADD s1, t1, s1
LD a2, 0 * SIZE(X)
fabs a5, t1
SXADDQ INCX, X, X
ADD s2, t2, s2
LD a3, 0 * SIZE(X)
fabs a6, t2
SXADDQ INCX, X, X
ADD s3, t3, s3
LD a4, 0 * SIZE(X)
fabs a7, t3
SXADDQ INCX, X, X
LD a5, 0 * SIZE(X)
unop
SXADDQ INCX, X, X
bne I, $L12
.align 4
$L13:
ADD s0, t0, s0
LD a6, 0 * SIZE(X)
fabs a0, t0
SXADDQ INCX, X, X
ADD s1, t1, s1
LD a7, 0 * SIZE(X)
fabs a1, t1
SXADDQ INCX, X, X
ADD s2, t2, s2
fabs a2, t2
ADD s3, t3, s3
fabs a3, t3
ADD s0, t0, s0
fabs a4, t0
ADD s1, t1, s1
fabs a5, t1
ADD s2, t2, s2
fabs a6, t2
ADD s3, t3, s3
fabs a7, t3
ADD s1, t1, s1
ADD s2, t2, s2
ADD s3, t3, s3
ADD s0, s1, s0
ADD s2, s3, s2
.align 4
$L15:
and N, 7, I
ADD s0, s2, s0
unop
ble I, $L999
.align 4
$L17:
ADD s0, t0, s0
LD a0, 0 * SIZE(X)
SXADDQ INCX, X, X
fabs a0, t0
lda I, -1(I)
bne I, $L17
.align 4
$L999:
ADD s0, t0, s0
ret
EPILOGUE

428
kernel/alpha/axpy.S Normal file
View File

@@ -0,0 +1,428 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCHSIZE 40
PROLOGUE
PROFCODE
.frame $sp, 16, $26, 0
ldq $24, 0($sp)
fmov $f19, $f30
ldl $23, 8($sp)
lda $sp, -16($sp)
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
nop
sra $16, 3, $1
stt $f2, 0($sp)
cmpeq $21, 1, $3
stt $f3, 8($sp)
cmpeq $23, 1, $4
and $16, 7, $2
ble $16, $End
and $3, $4, $3
fbeq $f30, $End
beq $3, $Sub
ble $1, $Remain
.align 4
LD $f10, 0*SIZE($20)
LD $f11, 1*SIZE($20)
LD $f12, 2*SIZE($20)
LD $f13, 3*SIZE($20)
LD $f18, 0*SIZE($24)
LD $f19, 1*SIZE($24)
LD $f20, 2*SIZE($24)
LD $f21, 3*SIZE($24)
LD $f14, 4*SIZE($20)
LD $f15, 5*SIZE($20)
LD $f16, 6*SIZE($20)
LD $f17, 7*SIZE($20)
LD $f22, 4*SIZE($24)
LD $f23, 5*SIZE($24)
LD $f24, 6*SIZE($24)
LD $f25, 7*SIZE($24)
subq $1, 1, $1
addq $20, 8*SIZE, $20
unop
ble $1, $LoopEnd
.align 4
$Loop:
ldt $f31, PREFETCHSIZE * SIZE($24)
ldl $31, PREFETCHSIZE * SIZE($20)
MUL $f30, $f10, $f26 # ctemp1 = da * atemp1
LD $f10, 0*SIZE($20)
MUL $f30, $f11, $f27
LD $f11, 1*SIZE($20)
MUL $f30, $f12, $f28
LD $f12, 2*SIZE($20)
MUL $f30, $f13, $f29
LD $f13, 3*SIZE($20)
ADD $f18, $f26, $f0
LD $f18, 8*SIZE($24)
MUL $f30, $f14, $f26 # ctemp1 = da * atemp1
LD $f14, 4*SIZE($20)
ADD $f19, $f27, $f1
LD $f19, 9*SIZE($24)
MUL $f30, $f15, $f27
LD $f15, 5*SIZE($20)
ADD $f20, $f28, $f2
LD $f20, 10*SIZE($24)
MUL $f30, $f16, $f28
LD $f16, 6*SIZE($20)
ADD $f21, $f29, $f3
LD $f21, 11*SIZE($24)
MUL $f30, $f17, $f29
LD $f17, 7*SIZE($20)
ST $f0, 0*SIZE($24)
ADD $f22, $f26, $f0
ST $f1, 1*SIZE($24)
ADD $f23, $f27, $f1
ST $f2, 2*SIZE($24)
ADD $f24, $f28, $f2
ST $f3, 3*SIZE($24)
ADD $f25, $f29, $f3
LD $f22, 12*SIZE($24)
LD $f23, 13*SIZE($24)
LD $f24, 14*SIZE($24)
LD $f25, 15*SIZE($24)
ST $f0, 4*SIZE($24)
ST $f1, 5*SIZE($24)
ST $f2, 6*SIZE($24)
ST $f3, 7*SIZE($24)
subq $1, 1, $1
addq $24, 8*SIZE, $24
addq $20, 8*SIZE, $20
bgt $1, $Loop
.align 4
$LoopEnd:
MUL $f30, $f10, $f26 # ctemp1 = da * atemp1
MUL $f30, $f11, $f27
MUL $f30, $f12, $f28
MUL $f30, $f13, $f29
ADD $f18, $f26, $f0
MUL $f30, $f14, $f26 # ctemp1 = da * atemp1
ADD $f19, $f27, $f1
MUL $f30, $f15, $f27
ADD $f20, $f28, $f2
MUL $f30, $f16, $f28
ADD $f21, $f29, $f3
MUL $f30, $f17, $f29
ST $f0, 0*SIZE($24)
ADD $f22, $f26, $f0
ST $f1, 1*SIZE($24)
ADD $f23, $f27, $f1
ST $f2, 2*SIZE($24)
ADD $f24, $f28, $f2
ST $f3, 3*SIZE($24)
ADD $f25, $f29, $f3
ST $f0, 4*SIZE($24)
ST $f1, 5*SIZE($24)
ST $f2, 6*SIZE($24)
ST $f3, 7*SIZE($24)
addq $24, 8*SIZE, $24
.align 4
$Remain:
ble $2, $End
.align 4
$RemainLoop:
LD $f10, 0*SIZE($20)
LD $f11, 0*SIZE($24)
addq $20, SIZE, $20
addq $24, SIZE, $24
MUL $f30, $f10, $f12
subq $2, 1, $2
ADD $f11, $f12, $f13
ST $f13, -1*SIZE($24)
bgt $2, $RemainLoop
.align 4
$End:
ldt $f2, 0($sp)
ldt $f3, 8($sp)
lda $sp, 16($sp)
ret
.align 4
$Sub:
SXSUBL $16, SIZE, $22
subq $1, 1, $4
ble $1, $SubRemain
.align 4
LD $f10, 0($20)
SXADDQ $21, $20, $20
LD $f11, 0($20)
SXADDQ $21, $20, $20
LD $f12, 0($20)
SXADDQ $21, $20, $20
LD $f13, 0($20)
SXADDQ $21, $20, $20
LD $f18, 0($24)
SXADDQ $23, $24, $22
LD $f19, 0($22)
SXADDQ $23, $22, $22
LD $f20, 0($22)
SXADDQ $23, $22, $22
LD $f21, 0($22)
SXADDQ $23, $22, $22
LD $f14, 0($20)
SXADDQ $21, $20, $20
LD $f15, 0($20)
SXADDQ $21, $20, $20
LD $f16, 0($20)
SXADDQ $21, $20, $20
LD $f17, 0($20)
SXADDQ $21, $20, $20
LD $f22, 0($22)
SXADDQ $23, $22, $22
LD $f23, 0($22)
SXADDQ $23, $22, $22
LD $f24, 0($22)
SXADDQ $23, $22, $22
LD $f25, 0($22)
SXADDQ $23, $22, $22
unop
ble $4, $SubLoopEnd
.align 4
$SubLoop:
MUL $f30, $f10, $f26 # ctemp1 = da * atemp1
LD $f10, 0($20)
unop
SXADDQ $21, $20, $20
MUL $f30, $f11, $f27
LD $f11, 0($20)
unop
SXADDQ $21, $20, $20
MUL $f30, $f12, $f28
LD $f12, 0($20)
unop
SXADDQ $21, $20, $20
MUL $f30, $f13, $f29
LD $f13, 0($20)
unop
SXADDQ $21, $20, $20
ADD $f18, $f26, $f0
MUL $f30, $f14, $f26 # ctemp1 = da * atemp1
LD $f14, 0($20)
SXADDQ $21, $20, $20
ADD $f19, $f27, $f1
MUL $f30, $f15, $f27
LD $f15, 0($20)
SXADDQ $21, $20, $20
ADD $f20, $f28, $f2
MUL $f30, $f16, $f28
LD $f16, 0($20)
SXADDQ $21, $20, $20
ADD $f21, $f29, $f3
MUL $f30, $f17, $f29
LD $f17, 0($20)
SXADDQ $21, $20, $20
ST $f0, 0($24)
SXADDQ $23, $24, $24
ADD $f22, $f26, $f0
unop
ST $f1, 0($24)
SXADDQ $23, $24, $24
ADD $f23, $f27, $f1
unop
ST $f2, 0($24)
SXADDQ $23, $24, $24
ADD $f24, $f28, $f2
unop
ST $f3, 0($24)
SXADDQ $23, $24, $24
ADD $f25, $f29, $f3
unop
LD $f18, 0($22)
SXADDQ $23, $22, $22
LD $f19, 0($22)
SXADDQ $23, $22, $22
LD $f20, 0($22)
SXADDQ $23, $22, $22
LD $f21, 0($22)
SXADDQ $23, $22, $22
LD $f22, 0($22)
SXADDQ $23, $22, $22
LD $f23, 0($22)
SXADDQ $23, $22, $22
LD $f24, 0($22)
SXADDQ $23, $22, $22
LD $f25, 0($22)
SXADDQ $23, $22, $22
ST $f0, 0($24)
SXADDQ $23, $24, $24
ST $f1, 0($24)
SXADDQ $23, $24, $24
ST $f2, 0($24)
SXADDQ $23, $24, $24
ST $f3, 0($24)
SXADDQ $23, $24, $24
subq $4, 1, $4
bgt $4, $SubLoop
.align 4
$SubLoopEnd:
MUL $f30, $f10, $f26 # ctemp1 = da * atemp1
MUL $f30, $f11, $f27
MUL $f30, $f12, $f28
MUL $f30, $f13, $f29
ADD $f18, $f26, $f0
MUL $f30, $f14, $f26 # ctemp1 = da * atemp1
ADD $f19, $f27, $f1
MUL $f30, $f15, $f27
ADD $f20, $f28, $f2
MUL $f30, $f16, $f28
ADD $f21, $f29, $f3
MUL $f30, $f17, $f29
ST $f0, 0($24)
SXADDQ $23, $24, $24
ST $f1, 0($24)
SXADDQ $23, $24, $24
ST $f2, 0($24)
SXADDQ $23, $24, $24
ST $f3, 0($24)
SXADDQ $23, $24, $24
ADD $f22, $f26, $f0
ADD $f23, $f27, $f1
ADD $f24, $f28, $f2
ADD $f25, $f29, $f3
ST $f0, 0($24)
SXADDQ $23, $24, $24
ST $f1, 0($24)
SXADDQ $23, $24, $24
ST $f2, 0($24)
SXADDQ $23, $24, $24
ST $f3, 0($24)
SXADDQ $23, $24, $24
.align 4
$SubRemain:
ble $2, $SubEnd
.align 4
$SubRemainLoop:
LD $f10, 0($20)
LD $f11, 0($24)
SXADDQ $21, $20, $20
MUL $f30, $f10, $f12
subq $2, 1, $2
ADD $f11, $f12, $f13
ST $f13, 0($24)
SXADDQ $23, $24, $24
bgt $2, $SubRemainLoop
.align 4
$SubEnd:
ldt $f2, 0($sp)
ldt $f3, 8($sp)
lda $sp, 16($sp)
ret
EPILOGUE

71
kernel/alpha/cabs.S Normal file
View File

@@ -0,0 +1,71 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
.set noat
.set noreorder
.text
.align 5
.globl NAME
.ent NAME
NAME:
.frame $sp, 0, $26, 0
#ifdef PROFILE
ldgp $gp, 0($27)
lda $28, _mcount
jsr $28, ($28), _mcount
#endif
LD $f10, 0($16)
LD $f11, SIZE($16)
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
fabs $f10, $f12
fabs $f11, $f0
ADD $f12, $f0, $f0
ret
.end NAME
.ident VERSION

426
kernel/alpha/cnrm2.S Normal file
View File

@@ -0,0 +1,426 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCH_SIZE 80
#define N $16
#define X $17
#define INCX $18
#define XX $19
#define I $0
#define a0 $f0
#define a1 $f1
#define a2 $f10
#define a3 $f11
#define t0 $f12
#define t1 $f13
#define t2 $f14
#define t3 $f15
#define x0 $f16
#define x1 $f17
#define x2 $f18
#define x3 $f19
#define x4 $f20
#define x5 $f21
#define x6 $f22
#define x7 $f23
PROLOGUE
#if defined(EV4) || defined(EV5)
.frame $30,16,$26,0
.mask 0x4000000,-16
ldah $29, 0($27) !gpdisp!1
lda $29, 0($29) !gpdisp!1
lda $sp, -16($sp)
ldq $27, sqrt($29) !literal!2
stq $26, 0($sp)
PROFCODE
.prologue 1
#else
PROFCODE
#endif
fclr a0
sll INCX, ZBASE_SHIFT, INCX
fclr a1
ble N, $L999
fclr a2
cmpeq INCX, 2 * SIZE, $0
fclr a3
beq $0, $L20
fclr t0
sra N, 3, I
fclr t1
ble I, $L15
fclr t2
LD x0, 0 * SIZE(X)
fclr t3
LD x1, 1 * SIZE(X)
LD x2, 2 * SIZE(X)
LD x3, 3 * SIZE(X)
LD x4, 4 * SIZE(X)
LD x5, 5 * SIZE(X)
LD x6, 6 * SIZE(X)
LD x7, 7 * SIZE(X)
lda I, -1(I)
ble I, $L12
.align 4
$L11:
addt a0, t0, a0
ldl $31, (PREFETCH_SIZE) * SIZE(X)
mult x0, x0, t0
LD x0, 8 * SIZE(X)
addt a1, t1, a1
mov X, XX
mult x1, x1, t1
LD x1, 9 * SIZE(X)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 10 * SIZE(X)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 11 * SIZE(X)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 12 * SIZE(X)
addt a1, t1, a1
unop
mult x5, x5, t1
LD x5, 13 * SIZE(X)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 14 * SIZE(X)
addt a3, t3, a3
unop
mult x7, x7, t3
LD x7, 15 * SIZE(X)
addt a0, t0, a0
unop
mult x0, x0, t0
LD x0, 16 * SIZE(X)
addt a1, t1, a1
lda X, 16 * SIZE(X)
mult x1, x1, t1
LD x1, 17 * SIZE(XX)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 18 * SIZE(XX)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 19 * SIZE(XX)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 20 * SIZE(XX)
addt a1, t1, a1
lda I, -1(I)
mult x5, x5, t1
LD x5, 21 * SIZE(XX)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 22 * SIZE(XX)
addt a3, t3, a3
mult x7, x7, t3
LD x7, 23 * SIZE(XX)
bgt I, $L11
.align 4
$L12:
addt a0, t0, a0
mov X, XX
mult x0, x0, t0
LD x0, 8 * SIZE(X)
addt a1, t1, a1
unop
mult x1, x1, t1
LD x1, 9 * SIZE(X)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 10 * SIZE(X)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 11 * SIZE(X)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 12 * SIZE(XX)
addt a1, t1, a1
unop
mult x5, x5, t1
LD x5, 13 * SIZE(XX)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 14 * SIZE(XX)
addt a3, t3, a3
lda X, 16 * SIZE(X)
mult x7, x7, t3
LD x7, 15 * SIZE(XX)
addt a0, t0, a0
mult x0, x0, t0
addt a1, t1, a1
mult x1, x1, t1
addt a2, t2, a2
mult x2, x2, t2
addt a3, t3, a3
mult x3, x3, t3
addt a0, t0, a0
mult x4, x4, t0
addt a1, t1, a1
mult x5, x5, t1
addt a2, t2, a2
mult x6, x6, t2
addt a3, t3, a3
mult x7, x7, t3
addt a2, t2, a2
addt a3, t3, a3
.align 4
$L15:
and N, 7, I
ble I, $L998
.align 4
$L16:
LD x0, 0 * SIZE(X)
LD x1, 1 * SIZE(X)
lda X, 2 * SIZE(X)
addt a0, t0, a0
mult x0, x0, t0
addt a1, t1, a1
mult x1, x1, t1
lda I, -1(I)
bgt I, $L16
bsr $31, $L998
.align 4
$L20:
fclr t0
sra N, 2, I
fclr t1
ble I, $L25
LD x0, 0 * SIZE(X)
fclr t2
LD x1, 1 * SIZE(X)
addq X, INCX, X
LD x2, 0 * SIZE(X)
fclr t3
LD x3, 1 * SIZE(X)
addq X, INCX, X
LD x4, 0 * SIZE(X)
lda I, -1(I)
LD x5, 1 * SIZE(X)
addq X, INCX, X
LD x6, 0 * SIZE(X)
ble I, $L22
.align 4
$L21:
addt a0, t0, a0
LD x7, 1 * SIZE(X)
mult x0, x0, t0
addq X, INCX, X
addt a1, t1, a1
LD x0, 0 * SIZE(X)
mult x1, x1, t1
unop
addt a2, t2, a2
LD x1, 1 * SIZE(X)
mult x2, x2, t2
addq X, INCX, X
addt a3, t3, a3
LD x2, 0 * SIZE(X)
mult x3, x3, t3
unop
addt a0, t0, a0
LD x3, 1 * SIZE(X)
mult x4, x4, t0
addq X, INCX, X
addt a1, t1, a1
LD x4, 0 * SIZE(X)
mult x5, x5, t1
lda I, -1(I)
addt a2, t2, a2
LD x5, 1 * SIZE(X)
mult x6, x6, t2
addq X, INCX, X
addt a3, t3, a3
LD x6, 0 * SIZE(X)
mult x7, x7, t3
bgt I, $L21
.align 4
$L22:
addt a0, t0, a0
LD x7, 1 * SIZE(X)
mult x0, x0, t0
addq X, INCX, X
addt a1, t1, a1
mult x1, x1, t1
addt a2, t2, a2
mult x2, x2, t2
addt a3, t3, a3
mult x3, x3, t3
addt a0, t0, a0
mult x4, x4, t0
addt a1, t1, a1
mult x5, x5, t1
addt a2, t2, a2
mult x6, x6, t2
addt a3, t3, a3
mult x7, x7, t3
addt a2, t2, a2
addt a3, t3, a3
.align 4
$L25:
and N, 3, I
ble I, $L998
.align 4
$L26:
LD x0, 0 * SIZE(X)
lda I, -1(I)
LD x1, 1 * SIZE(X)
addq X, INCX, X
addt a0, t0, a0
mult x0, x0, t0
addt a1, t1, a1
mult x1, x1, t1
bgt I, $L26
.align 4
$L998:
addt a0, t0, a0
addt a1, t1, a1
addt a0, a1, a0
addt a2, a3, a2
#if defined(EV4) || defined(EV5)
addt a0, a2, $f16
jsr $26, ($27), sqrt !lituse_jsr!2
ldah $29, 0($26) !gpdisp!3
lda $29, 0($29) !gpdisp!3
#else
addt a0, a2, a0
sqrtt a0, a0
#endif
.align 4
$L999:
#if defined(EV4) || defined(EV5)
ldq $26, 0($sp)
lda $sp, 16($sp)
#endif
ret
EPILOGUE

379
kernel/alpha/copy.S Normal file
View File

@@ -0,0 +1,379 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#define Y $19
#define INCY $20
PROLOGUE
PROFCODE
.frame $sp, 0, $26, 0
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
cmpeq INCX, 1, $0
ble N, $End
#ifndef COMPLEX
sra N, 4, $4
#else
sra N, 3, $4
#endif
cmpeq INCY, 1, $1
and $0, $1, $0
beq $0, $Sub
#ifndef COMPLEX
and N, 15, $5
#else
and N, 7, $5
#endif
ble $4, $Remain
LD $f10, 0*SIZE(X)
LD $f11, 1*SIZE(X)
LD $f12, 2*SIZE(X)
LD $f13, 3*SIZE(X)
LD $f14, 4*SIZE(X)
LD $f15, 5*SIZE(X)
LD $f16, 6*SIZE(X)
LD $f17, 7*SIZE(X)
LD $f18, 8*SIZE(X)
LD $f19, 9*SIZE(X)
LD $f20, 10*SIZE(X)
LD $f21, 11*SIZE(X)
LD $f22, 12*SIZE(X)
LD $f23, 13*SIZE(X)
LD $f24, 14*SIZE(X)
LD $f25, 15*SIZE(X)
subq $4, 1, $4
lda X, 16*SIZE(X)
ble $4, $MainLoopEnd
.align 4
$MainLoop:
ST $f10, 0*SIZE(Y)
ST $f11, 1*SIZE(Y)
ST $f12, 2*SIZE(Y)
ST $f13, 3*SIZE(Y)
LD $f10, 0*SIZE(X)
LD $f11, 1*SIZE(X)
LD $f12, 2*SIZE(X)
LD $f13, 3*SIZE(X)
ST $f14, 4*SIZE(Y)
ST $f15, 5*SIZE(Y)
ST $f16, 6*SIZE(Y)
ST $f17, 7*SIZE(Y)
LD $f14, 4*SIZE(X)
LD $f15, 5*SIZE(X)
LD $f16, 6*SIZE(X)
LD $f17, 7*SIZE(X)
ST $f18, 8*SIZE(Y)
ST $f19, 9*SIZE(Y)
ST $f20, 10*SIZE(Y)
ST $f21, 11*SIZE(Y)
LD $f18, 8*SIZE(X)
LD $f19, 9*SIZE(X)
LD $f20, 10*SIZE(X)
LD $f21, 11*SIZE(X)
ST $f22, 12*SIZE(Y)
ST $f23, 13*SIZE(Y)
ST $f24, 14*SIZE(Y)
ST $f25, 15*SIZE(Y)
LD $f22, 12*SIZE(X)
LD $f23, 13*SIZE(X)
LD $f24, 14*SIZE(X)
LD $f25, 15*SIZE(X)
subq $4, 1, $4
lda Y, 16*SIZE(Y)
lda X, 16*SIZE(X)
bgt $4, $MainLoop
.align 4
$MainLoopEnd:
ST $f10, 0*SIZE(Y)
ST $f11, 1*SIZE(Y)
ST $f12, 2*SIZE(Y)
ST $f13, 3*SIZE(Y)
ST $f14, 4*SIZE(Y)
ST $f15, 5*SIZE(Y)
ST $f16, 6*SIZE(Y)
ST $f17, 7*SIZE(Y)
ST $f18, 8*SIZE(Y)
ST $f19, 9*SIZE(Y)
ST $f20, 10*SIZE(Y)
ST $f21, 11*SIZE(Y)
ST $f22, 12*SIZE(Y)
ST $f23, 13*SIZE(Y)
ST $f24, 14*SIZE(Y)
ST $f25, 15*SIZE(Y)
lda Y, 16*SIZE(Y)
.align 4
$Remain:
ble $5, $End
.align 4
$RemainLoop:
#ifndef COMPLEX
LD $f10, 0*SIZE(X)
lda X, 1*SIZE(X)
ST $f10, 0*SIZE(Y)
lda Y, 1*SIZE(Y)
#else
LD $f10, 0*SIZE(X)
LD $f11, 1*SIZE(X)
lda X, 2*SIZE(X)
ST $f10, 0*SIZE(Y)
ST $f11, 1*SIZE(Y)
lda Y, 2*SIZE(Y)
#endif
subq $5, 1, $5
bgt $5, $RemainLoop
.align 4
$End:
ret
.align 4
$Sub:
#ifdef COMPLEX
addq INCX, INCX, INCX
addq INCY, INCY, INCY
and N, 7, $5
#else
and N, 15, $5
#endif
ble $4, $SubRemain
.align 4
$SubMainLoop:
#ifndef COMPLEX
LD $f10, 0(X)
SXADDQ INCX, X, X
LD $f11, 0(X)
SXADDQ INCX, X, X
LD $f12, 0(X)
SXADDQ INCX, X, X
LD $f13, 0(X)
SXADDQ INCX, X, X
LD $f14, 0(X)
SXADDQ INCX, X, X
LD $f15, 0(X)
SXADDQ INCX, X, X
LD $f16, 0(X)
SXADDQ INCX, X, X
LD $f17, 0(X)
SXADDQ INCX, X, X
LD $f18, 0(X)
SXADDQ INCX, X, X
LD $f19, 0(X)
SXADDQ INCX, X, X
LD $f20, 0(X)
SXADDQ INCX, X, X
LD $f21, 0(X)
SXADDQ INCX, X, X
LD $f22, 0(X)
SXADDQ INCX, X, X
LD $f23, 0(X)
SXADDQ INCX, X, X
LD $f24, 0(X)
SXADDQ INCX, X, X
LD $f25, 0(X)
SXADDQ INCX, X, X
ST $f10, 0(Y)
SXADDQ INCY, Y, Y
ST $f11, 0(Y)
SXADDQ INCY, Y, Y
ST $f12, 0(Y)
SXADDQ INCY, Y, Y
ST $f13, 0(Y)
SXADDQ INCY, Y, Y
ST $f14, 0(Y)
SXADDQ INCY, Y, Y
ST $f15, 0(Y)
SXADDQ INCY, Y, Y
ST $f16, 0(Y)
SXADDQ INCY, Y, Y
ST $f17, 0(Y)
SXADDQ INCY, Y, Y
ST $f18, 0(Y)
SXADDQ INCY, Y, Y
ST $f19, 0(Y)
SXADDQ INCY, Y, Y
ST $f20, 0(Y)
SXADDQ INCY, Y, Y
ST $f21, 0(Y)
SXADDQ INCY, Y, Y
ST $f22, 0(Y)
SXADDQ INCY, Y, Y
ST $f23, 0(Y)
SXADDQ INCY, Y, Y
ST $f24, 0(Y)
SXADDQ INCY, Y, Y
ST $f25, 0(Y)
SXADDQ INCY, Y, Y
#else
LD $f10, 0(X)
LD $f11, SIZE(X)
SXADDQ INCX, X, X
LD $f12, 0(X)
LD $f13, SIZE(X)
SXADDQ INCX, X, X
LD $f14, 0(X)
LD $f15, SIZE(X)
SXADDQ INCX, X, X
LD $f16, 0(X)
LD $f17, SIZE(X)
SXADDQ INCX, X, X
LD $f18, 0(X)
LD $f19, SIZE(X)
SXADDQ INCX, X, X
LD $f20, 0(X)
LD $f21, SIZE(X)
SXADDQ INCX, X, X
LD $f22, 0(X)
LD $f23, SIZE(X)
SXADDQ INCX, X, X
LD $f24, 0(X)
LD $f25, SIZE(X)
SXADDQ INCX, X, X
ST $f10, 0(Y)
ST $f11, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f12, 0(Y)
ST $f13, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f14, 0(Y)
ST $f15, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f16, 0(Y)
ST $f17, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f18, 0(Y)
ST $f19, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f20, 0(Y)
ST $f21, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f22, 0(Y)
ST $f23, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f24, 0(Y)
ST $f25, SIZE(Y)
SXADDQ INCY, Y, Y
#endif
subq $4, 1, $4
bgt $4, $SubMainLoop
.align 4
$SubRemain:
ble $5, $SubEnd
.align 4
$SubRemainLoop:
#ifndef COMPLEX
LD $f10, 0(X)
SXADDQ INCX, X, X
ST $f10, 0(Y)
SXADDQ INCY, Y, Y
#else
LD $f10, 0(X)
LD $f11, SIZE(X)
SXADDQ INCX, X, X
ST $f10, 0(Y)
ST $f11, SIZE(Y)
SXADDQ INCY, Y, Y
#endif
subq $5, 1, $5
bgt $5, $SubRemainLoop
.align 4
$SubEnd:
ret
EPILOGUE

217
kernel/alpha/cscal.S Normal file
View File

@@ -0,0 +1,217 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
.set noat
.set noreorder
#define ASSEMBLER
#include "common.h"
#include "version.h"
.globl NAME
.ent NAME
NAME:
#ifdef PROFILE
ldgp $gp, 0($27)
lda $28, _mcount
jsr $28, ($28), _mcount
#endif
#ifndef C_INTERFACE
ldl $16, 0($16) # n
mov $18, $20 # Store Address
ldl $19, 0($19) # incx
nop
LD $f1, 0($17) # alpha
#else
mov $18, $20 # Store Address
fmov $f17, $f1 # alpha
#endif
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
sra $16, 1, $21 # 4-unrolling
ble $16, $End
lda $23, -1($19)
ble $19, $End
bgt $23, $INC_NOT_1
.align 4
ble $21, $Sub
lda $21, -1($21)
LD $f10, 0*SIZE($18)
LD $f11, 1*SIZE($18)
LD $f12, 2*SIZE($18)
LD $f13, 3*SIZE($18)
lda $18, 4*SIZE($18)
ble $21, $MainRemain
.align 4
$MainLoop:
MUL $f10, $f1, $f20
LD $f10, 0*SIZE($18)
MUL $f11, $f1, $f21
LD $f11, 1*SIZE($18)
MUL $f12, $f1, $f22
LD $f12, 2*SIZE($18)
MUL $f13, $f1, $f23
LD $f13, 3*SIZE($18)
lda $18, 4*SIZE($18)
lda $21, -1($21)
ST $f20, 0*SIZE($20)
ST $f21, 1*SIZE($20)
ST $f22, 2*SIZE($20)
ST $f23, 3*SIZE($20)
lda $20, 4*SIZE($20)
bgt $21, $MainLoop
.align 4
$MainRemain:
MUL $f10, $f1, $f20
MUL $f11, $f1, $f21
MUL $f12, $f1, $f22
MUL $f13, $f1, $f23
ST $f20, 0*SIZE($20)
ST $f21, 1*SIZE($20)
ST $f22, 2*SIZE($20)
ST $f23, 3*SIZE($20)
lda $20, 4*SIZE($20)
.align 4
$Sub:
blbc $16, $End
LD $f10, 0*SIZE($18)
LD $f11, 1*SIZE($18)
MUL $f10, $f1, $f20
MUL $f11, $f1, $f21
ST $f20, 0*SIZE($20)
ST $f21, 1*SIZE($20)
.align 4
$End:
ret
.align 4
$INC_NOT_1:
addl $19, $19, $19
ble $21, $INC_Sub
lda $21, -1($21)
LD $f10, 0*SIZE($18)
LD $f11, 1*SIZE($18)
SXADDQ $19, $18, $18
LD $f12, 0*SIZE($18)
LD $f13, 1*SIZE($18)
SXADDQ $19, $18, $18
ble $21, $INC_MainRemain
.align 4
$INC_MainLoop:
MUL $f10, $f1, $f20
LD $f10, 0*SIZE($18)
MUL $f11, $f1, $f21
LD $f11, 1*SIZE($18)
SXADDQ $19, $18, $18
MUL $f12, $f1, $f22
LD $f12, 0*SIZE($18)
MUL $f13, $f1, $f23
LD $f13, 1*SIZE($18)
SXADDQ $19, $18, $18
ST $f20, 0*SIZE($20)
lda $21, -1($21)
ST $f21, 1*SIZE($20)
SXADDQ $19, $20, $20
ST $f22, 0*SIZE($20)
ST $f23, 1*SIZE($20)
SXADDQ $19, $20, $20
unop
bgt $21, $INC_MainLoop
.align 4
$INC_MainRemain:
MUL $f10, $f1, $f20
MUL $f11, $f1, $f21
MUL $f12, $f1, $f22
MUL $f13, $f1, $f23
ST $f20, 0*SIZE($20)
ST $f21, 1*SIZE($20)
SXADDQ $19, $20, $20
ST $f22, 0*SIZE($20)
ST $f23, 1*SIZE($20)
SXADDQ $19, $20, $20
.align 4
$INC_Sub:
blbc $16, $INC_End
LD $f10, 0*SIZE($18)
LD $f11, 1*SIZE($18)
MUL $f10, $f1, $f20
MUL $f11, $f1, $f21
ST $f20, 0*SIZE($20)
ST $f21, 1*SIZE($20)
.align 4
$INC_End:
ret
.end NAME
.ident VERSION

431
kernel/alpha/dnrm2.S Normal file
View File

@@ -0,0 +1,431 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCH_SIZE 80
#define N $16
#define X $17
#define INCX $18
#define XX $19
#define I $0
#define a0 $f0
#define a1 $f1
#define a2 $f10
#define a3 $f11
#define t0 $f12
#define t1 $f13
#define t2 $f14
#define t3 $f15
#define x0 $f16
#define x1 $f17
#define x2 $f18
#define x3 $f19
#define x4 $f20
#define x5 $f21
#define x6 $f22
#define x7 $f23
PROLOGUE
#if defined(EV4) || defined(EV5)
.frame $30,16,$26,0
.mask 0x4000000,-16
ldah $29, 0($27) !gpdisp!1
lda $29, 0($29) !gpdisp!1
lda $sp, -16($sp)
ldq $27, sqrt($29) !literal!2
stq $26, 0($sp)
PROFCODE
.prologue 1
#else
PROFCODE
#endif
fclr a0
SXADDQ INCX, 0, INCX
fclr a1
ble N, $L999
fclr a2
cmpeq INCX, SIZE, $0
fclr a3
beq $0, $L20
fclr t0
sra N, 4, I
fclr t1
ble I, $L15
fclr t2
LD x0, 0 * SIZE(X)
fclr t3
LD x1, 1 * SIZE(X)
LD x2, 2 * SIZE(X)
LD x3, 3 * SIZE(X)
LD x4, 4 * SIZE(X)
LD x5, 5 * SIZE(X)
LD x6, 6 * SIZE(X)
LD x7, 7 * SIZE(X)
lda I, -1(I)
ble I, $L12
.align 4
$L11:
addt a0, t0, a0
ldl $31, (PREFETCH_SIZE) * SIZE(X)
mult x0, x0, t0
LD x0, 8 * SIZE(X)
addt a1, t1, a1
mov X, XX
mult x1, x1, t1
LD x1, 9 * SIZE(X)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 10 * SIZE(X)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 11 * SIZE(X)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 12 * SIZE(X)
addt a1, t1, a1
unop
mult x5, x5, t1
LD x5, 13 * SIZE(X)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 14 * SIZE(X)
addt a3, t3, a3
unop
mult x7, x7, t3
LD x7, 15 * SIZE(X)
addt a0, t0, a0
unop
mult x0, x0, t0
LD x0, 16 * SIZE(X)
addt a1, t1, a1
lda X, 16 * SIZE(X)
mult x1, x1, t1
LD x1, 17 * SIZE(XX)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 18 * SIZE(XX)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 19 * SIZE(XX)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 20 * SIZE(XX)
addt a1, t1, a1
lda I, -1(I)
mult x5, x5, t1
LD x5, 21 * SIZE(XX)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 22 * SIZE(XX)
addt a3, t3, a3
mult x7, x7, t3
LD x7, 23 * SIZE(XX)
bgt I, $L11
.align 4
$L12:
addt a0, t0, a0
mov X, XX
mult x0, x0, t0
LD x0, 8 * SIZE(X)
addt a1, t1, a1
unop
mult x1, x1, t1
LD x1, 9 * SIZE(X)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 10 * SIZE(X)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 11 * SIZE(X)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 12 * SIZE(XX)
addt a1, t1, a1
unop
mult x5, x5, t1
LD x5, 13 * SIZE(XX)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 14 * SIZE(XX)
addt a3, t3, a3
lda X, 16 * SIZE(X)
mult x7, x7, t3
LD x7, 15 * SIZE(XX)
addt a0, t0, a0
mult x0, x0, t0
addt a1, t1, a1
mult x1, x1, t1
addt a2, t2, a2
mult x2, x2, t2
addt a3, t3, a3
mult x3, x3, t3
addt a0, t0, a0
mult x4, x4, t0
addt a1, t1, a1
mult x5, x5, t1
addt a2, t2, a2
mult x6, x6, t2
addt a3, t3, a3
mult x7, x7, t3
addt a1, t1, a1
addt a2, t2, a2
addt a3, t3, a3
.align 4
$L15:
and N, 15, I
ble I, $L998
.align 4
$L16:
LD x0, 0 * SIZE(X)
lda X, 1 * SIZE(X)
addt a0, t0, a0
mult x0, x0, t0
lda I, -1(I)
bgt I, $L16
bsr $31, $L998
.align 4
$L20:
fclr t0
sra N, 3, I
fclr t1
ble I, $L25
fclr t2
fclr t3
LD x0, 0 * SIZE(X)
addq X, INCX, X
LD x1, 0 * SIZE(X)
addq X, INCX, X
LD x2, 0 * SIZE(X)
addq X, INCX, X
LD x3, 0 * SIZE(X)
addq X, INCX, X
LD x4, 0 * SIZE(X)
addq X, INCX, X
LD x5, 0 * SIZE(X)
addq X, INCX, X
LD x6, 0 * SIZE(X)
addq X, INCX, X
lda I, -1(I)
ble I, $L22
.align 4
$L21:
addt a0, t0, a0
LD x7, 0 * SIZE(X)
mult x0, x0, t0
addq X, INCX, X
addt a1, t1, a1
LD x0, 0 * SIZE(X)
mult x1, x1, t1
addq X, INCX, X
addt a2, t2, a2
LD x1, 0 * SIZE(X)
mult x2, x2, t2
addq X, INCX, X
addt a3, t3, a3
LD x2, 0 * SIZE(X)
mult x3, x3, t3
addq X, INCX, X
addt a0, t0, a0
LD x3, 0 * SIZE(X)
mult x4, x4, t0
addq X, INCX, X
addt a1, t1, a1
LD x4, 0 * SIZE(X)
mult x5, x5, t1
addq X, INCX, X
addt a2, t2, a2
LD x5, 0 * SIZE(X)
mult x6, x6, t2
addq X, INCX, X
addt a3, t3, a3
LD x6, 0 * SIZE(X)
mult x7, x7, t3
addq X, INCX, X
lda I, -1(I)
bgt I, $L21
.align 4
$L22:
addt a0, t0, a0
LD x7, 0 * SIZE(X)
mult x0, x0, t0
addq X, INCX, X
addt a1, t1, a1
unop
mult x1, x1, t1
unop
addt a2, t2, a2
mult x2, x2, t2
addt a3, t3, a3
mult x3, x3, t3
addt a0, t0, a0
mult x4, x4, t0
addt a1, t1, a1
mult x5, x5, t1
addt a2, t2, a2
mult x6, x6, t2
addt a3, t3, a3
mult x7, x7, t3
addt a1, t1, a1
addt a2, t2, a2
addt a3, t3, a3
.align 4
$L25:
and N, 7, I
ble I, $L998
.align 4
$L26:
LD x0, 0 * SIZE(X)
addq X, INCX, X
addt a0, t0, a0
mult x0, x0, t0
lda I, -1(I)
bgt I, $L26
.align 4
$L998:
addt a0, t0, a0
addt a0, a1, a0
addt a2, a3, a2
#if defined(EV4) || defined(EV5)
addt a0, a2, $f16
jsr $26, ($27), sqrt !lituse_jsr!2
ldah $29, 0($26) !gpdisp!3
lda $29, 0($29) !gpdisp!3
#else
addt a0, a2, a0
sqrtt a0, a0
#endif
.align 4
$L999:
#if defined(EV4) || defined(EV5)
ldq $26, 0($sp)
lda $sp, 16($sp)
#endif
ret
EPILOGUE

530
kernel/alpha/dot.S Normal file
View File

@@ -0,0 +1,530 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCHSIZE 88
#define N $16
#define X $17
#define INCX $18
#define Y $19
#define INCY $20
#define I $5
#define s0 $f0
#define s1 $f30
#define s2 $f1
#define s3 $f2
#define a0 $f10
#define a1 $f11
#define a2 $f12
#define a3 $f13
#define a4 $f14
#define a5 $f15
#define a6 $f16
#define a7 $f17
#define b0 $f18
#define b1 $f19
#define b2 $f20
#define b3 $f21
#define b4 $f22
#define b5 $f23
#define b6 $f24
#define b7 $f25
#define t0 $f26
#define t1 $f27
#define t2 $f28
#define t3 $f29
PROLOGUE
PROFCODE
.frame $sp, 16, $26, 0
lda $sp, -16($sp)
fclr s0
stt $f2, 0($sp)
fclr s1
fclr s2
nop
fclr s3
ble N, $L999
fclr t0
cmpeq INCX, 1, $21
fclr t1
cmpeq INCY, 1, $22
fclr t2
and $21, $22, $22
fclr t3
beq $22, $L20
#ifndef DOUBLE
srl N, 4, I
ble I, $L15
LD a0, 0 * SIZE(X)
LD a1, 1 * SIZE(X)
LD b0, 0 * SIZE(Y)
LD b1, 1 * SIZE(Y)
LD a2, 2 * SIZE(X)
LD a3, 3 * SIZE(X)
LD b2, 2 * SIZE(Y)
LD b3, 3 * SIZE(Y)
LD a4, 4 * SIZE(X)
LD a5, 5 * SIZE(X)
LD b4, 4 * SIZE(Y)
LD b5, 5 * SIZE(Y)
LD a6, 6 * SIZE(X)
LD a7, 7 * SIZE(X)
addq X, 16 * SIZE, X
subq I, 1, I
addq Y, 16 * SIZE, Y
ble I, $L13
.align 4
$L12:
ldl $31, PREFETCHSIZE * 2 * SIZE(X)
subq I, 1, I
ldl $31, PREFETCHSIZE * 2 * SIZE(Y)
addq X, 16 * SIZE, X
ADD s0, t0, s0
LD b6, -10 * SIZE(Y)
MUL a0, b0, t0
LD b7, -9 * SIZE(Y)
ADD s1, t1, s1
LD a0, -24 * SIZE(X)
MUL a1, b1, t1
LD a1, -23 * SIZE(X)
ADD s2, t2, s2
LD b0, -8 * SIZE(Y)
MUL a2, b2, t2
LD b1, -7 * SIZE(Y)
ADD s3, t3, s3
LD a2, -22 * SIZE(X)
MUL a3, b3, t3
LD a3, -21 * SIZE(X)
ADD s0, t0, s0
LD b2, -6 * SIZE(Y)
MUL a4, b4, t0
LD b3, -5 * SIZE(Y)
ADD s1, t1, s1
LD a4, -20 * SIZE(X)
MUL a5, b5, t1
LD a5, -19 * SIZE(X)
ADD s2, t2, s2
LD b4, -4 * SIZE(Y)
MUL a6, b6, t2
LD b5, -3 * SIZE(Y)
ADD s3, t3, s3
LD a6, -18 * SIZE(X)
MUL a7, b7, t3
LD a7, -17 * SIZE(X)
ADD s0, t0, s0
LD b6, -2 * SIZE(Y)
MUL a0, b0, t0
LD b7, -1 * SIZE(Y)
ADD s1, t1, s1
LD a0, -16 * SIZE(X)
MUL a1, b1, t1
LD a1, -15 * SIZE(X)
ADD s2, t2, s2
LD b0, 0 * SIZE(Y)
MUL a2, b2, t2
LD b1, 1 * SIZE(Y)
ADD s3, t3, s3
LD a2, -14 * SIZE(X)
MUL a3, b3, t3
LD a3, -13 * SIZE(X)
ADD s0, t0, s0
LD b2, 2 * SIZE(Y)
MUL a4, b4, t0
LD b3, 3 * SIZE(Y)
ADD s1, t1, s1
LD a4, -12 * SIZE(X)
MUL a5, b5, t1
LD a5, -11 * SIZE(X)
ADD s2, t2, s2
LD b4, 4 * SIZE(Y)
MUL a6, b6, t2
LD b5, 5 * SIZE(Y)
ADD s3, t3, s3
LD a6, -10 * SIZE(X)
MUL a7, b7, t3
LD a7, -9 * SIZE(X)
addq Y, 16 * SIZE, Y
bgt I, $L12
nop
fnop
.align 4
$L13:
ADD s0, t0, s0
LD b6,-10 * SIZE(Y)
MUL a0, b0, t0
LD b7, -9 * SIZE(Y)
ADD s1, t1, s1
LD a0, -8 * SIZE(X)
MUL a1, b1, t1
LD a1, -7 * SIZE(X)
ADD s2, t2, s2
LD b0, -8 * SIZE(Y)
MUL a2, b2, t2
LD b1, -7 * SIZE(Y)
ADD s3, t3, s3
LD a2, -6 * SIZE(X)
MUL a3, b3, t3
LD a3, -5 * SIZE(X)
ADD s0, t0, s0
LD b2, -6 * SIZE(Y)
MUL a4, b4, t0
LD b3, -5 * SIZE(Y)
ADD s1, t1, s1
LD a4, -4 * SIZE(X)
MUL a5, b5, t1
LD a5, -3 * SIZE(X)
ADD s2, t2, s2
LD b4, -4 * SIZE(Y)
MUL a6, b6, t2
LD b5, -3 * SIZE(Y)
ADD s3, t3, s3
LD a6, -2 * SIZE(X)
MUL a7, b7, t3
LD a7, -1 * SIZE(X)
ADD s0, t0, s0
LD b6, -2 * SIZE(Y)
MUL a0, b0, t0
LD b7, -1 * SIZE(Y)
ADD s1, t1, s1
MUL a1, b1, t1
ADD s2, t2, s2
MUL a2, b2, t2
ADD s3, t3, s3
MUL a3, b3, t3
ADD s0, t0, s0
MUL a4, b4, t0
ADD s1, t1, s1
MUL a5, b5, t1
ADD s2, t2, s2
MUL a6, b6, t2
ADD s3, t3, s3
MUL a7, b7, t3
.align 4
$L15:
ADD s0, t0, s0
and N, 15, I
ADD s1, t1, s1
ble I, $L18
.align 4
#else
srl N, 3, I
ble I, $L15
LD a0, 0 * SIZE(X)
LD a1, 1 * SIZE(X)
LD b0, 0 * SIZE(Y)
LD b1, 1 * SIZE(Y)
LD a2, 2 * SIZE(X)
LD a3, 3 * SIZE(X)
LD b2, 2 * SIZE(Y)
LD b3, 3 * SIZE(Y)
LD a4, 4 * SIZE(X)
LD a5, 5 * SIZE(X)
LD b4, 4 * SIZE(Y)
LD b5, 5 * SIZE(Y)
LD a6, 6 * SIZE(X)
LD a7, 7 * SIZE(X)
addq X, 8 * SIZE, X
subq I, 1, I
addq Y, 8 * SIZE, Y
ble I, $L13
.align 4
$L12:
ldl $31, PREFETCHSIZE * SIZE(X)
subq I, 1, I
ldl $31, PREFETCHSIZE * SIZE(Y)
addq X, 8 * SIZE, X
ADD s0, t0, s0
LD b6, -2 * SIZE(Y)
MUL a0, b0, t0
LD b7, -1 * SIZE(Y)
ADD s1, t1, s1
LD a0, -8 * SIZE(X)
MUL a1, b1, t1
LD a1, -7 * SIZE(X)
ADD s2, t2, s2
LD b0, 0 * SIZE(Y)
MUL a2, b2, t2
LD b1, 1 * SIZE(Y)
ADD s3, t3, s3
LD a2, -6 * SIZE(X)
MUL a3, b3, t3
LD a3, -5 * SIZE(X)
ADD s0, t0, s0
LD b2, 2 * SIZE(Y)
MUL a4, b4, t0
LD b3, 3 * SIZE(Y)
ADD s1, t1, s1
LD a4, -4 * SIZE(X)
MUL a5, b5, t1
LD a5, -3 * SIZE(X)
ADD s2, t2, s2
LD b4, 4 * SIZE(Y)
MUL a6, b6, t2
LD b5, 5 * SIZE(Y)
ADD s3, t3, s3
LD a6, -2 * SIZE(X)
MUL a7, b7, t3
LD a7, -1 * SIZE(X)
addq Y, 8 * SIZE, Y
bgt I, $L12
nop
fnop
.align 4
$L13:
ADD s0, t0, s0
LD b6, -2 * SIZE(Y)
MUL a0, b0, t0
LD b7, -1 * SIZE(Y)
ADD s1, t1, s1
MUL a1, b1, t1
ADD s2, t2, s2
MUL a2, b2, t2
ADD s3, t3, s3
MUL a3, b3, t3
ADD s0, t0, s0
MUL a4, b4, t0
ADD s1, t1, s1
MUL a5, b5, t1
ADD s2, t2, s2
MUL a6, b6, t2
ADD s3, t3, s3
MUL a7, b7, t3
.align 4
$L15:
ADD s0, t0, s0
and N, 7, I
ADD s1, t1, s1
ble I, $L18
.align 4
#endif
$L16:
LD a0, 0 * SIZE(X)
addq X, SIZE, X
LD b0, 0 * SIZE(Y)
addq Y, SIZE, Y
ADD s2, t2, s2
MUL a0, b0, t2
subq I, 1, I
bgt I, $L16
.align 4
$L18:
ADD s2, t2, s2
ADD s3, t3, s3
br $L999
.align 4
$L20:
srl N, 2, I
ble I, $L25
LD a0, 0 * SIZE(X)
SXADDQ INCX, X, X
LD b0, 0 * SIZE(Y)
SXADDQ INCY, Y, Y
LD a1, 0 * SIZE(X)
SXADDQ INCX, X, X
LD b1, 0 * SIZE(Y)
SXADDQ INCY, Y, Y
LD a2, 0 * SIZE(X)
SXADDQ INCX, X, X
LD b2, 0 * SIZE(Y)
SXADDQ INCY, Y, Y
LD a3, 0 * SIZE(X)
SXADDQ INCX, X, X
LD b3, 0 * SIZE(Y)
subq I, 1, I
SXADDQ INCY, Y, Y
ble I, $L23
.align 4
$L22:
ADD s0, t0, s0
MUL a0, b0, t0
ADD s1, t1, s1
MUL a1, b1, t1
ADD s2, t2, s2
MUL a2, b2, t2
ADD s3, t3, s3
MUL a3, b3, t3
LD a0, 0 * SIZE(X)
SXADDQ INCX, X, X
LD b0, 0 * SIZE(Y)
SXADDQ INCY, Y, Y
LD a1, 0 * SIZE(X)
SXADDQ INCX, X, X
LD b1, 0 * SIZE(Y)
SXADDQ INCY, Y, Y
LD a2, 0 * SIZE(X)
SXADDQ INCX, X, X
LD b2, 0 * SIZE(Y)
SXADDQ INCY, Y, Y
LD a3, 0 * SIZE(X)
SXADDQ INCX, X, X
LD b3, 0 * SIZE(Y)
SXADDQ INCY, Y, Y
subq I, 1, I
bgt I, $L22
nop
fnop
.align 4
$L23:
ADD s0, t0, s0
MUL a0, b0, t0
ADD s1, t1, s1
MUL a1, b1, t1
ADD s2, t2, s2
MUL a2, b2, t2
ADD s3, t3, s3
MUL a3, b3, t3
.align 4
$L25:
ADD s0, t0, s0
and N, 3, I
ADD s1, t1, s1
ble I, $L28
.align 4
$L26:
LD a0, 0 * SIZE(X)
SXADDQ INCX, X, X
LD b0, 0 * SIZE(Y)
SXADDQ INCY, Y, Y
ADD s2, t2, s2
MUL a0, b0, t2
subq I, 1, I
bgt I, $L26
.align 4
$L28:
ADD s2, t2, s2
ADD s3, t3, s3
.align 4
$L999:
ADD s2, s3, s2
ldt $f2, 0($sp)
ADD s0, s1, s0
lda $sp, 16($sp)
ADD s0, s2, s0
ret
EPILOGUE

179
kernel/alpha/gemm_beta.S Normal file
View File

@@ -0,0 +1,179 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
.set noat
.set noreorder
.text
.align 5
.globl CNAME
.ent CNAME
CNAME:
.frame $sp, 0, $26, 0
#ifdef PROFILE
ldgp $gp, 0($27)
lda $28, _mcount
jsr $28, ($28), _mcount
#endif
ldq $18, 16($sp)
ble $16, $End
ldl $19, 24($sp)
ble $17, $End
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
fbeq $f19, $BETA_EQ_ZERO # if (beta == ZERO)
.align 4
$BETA_NE_ZERO:
sra $16, 3, $2 # i = (m >> 3)
mov $18, $1 # c_offset = c
lda $17, -1($17) # j --
ble $2,$L52
.align 4
$L51:
lds $f31, 64($1)
lda $2, -1($2)
LD $f14, 0*SIZE($1)
LD $f15, 1*SIZE($1)
LD $f16, 2*SIZE($1)
LD $f17, 3*SIZE($1)
LD $f18, 4*SIZE($1)
LD $f11, 5*SIZE($1)
LD $f21, 6*SIZE($1)
LD $f22, 7*SIZE($1)
MUL $f19, $f14, $f23
MUL $f19, $f15, $f24
MUL $f19, $f16, $f25
MUL $f19, $f17, $f26
MUL $f19, $f18, $f27
MUL $f19, $f11, $f28
MUL $f19, $f21, $f29
MUL $f19, $f22, $f30
ST $f23, 0*SIZE($1)
ST $f24, 1*SIZE($1)
ST $f25, 2*SIZE($1)
ST $f26, 3*SIZE($1)
ST $f27, 4*SIZE($1)
ST $f28, 5*SIZE($1)
ST $f29, 6*SIZE($1)
ST $f30, 7*SIZE($1)
lda $1,8*SIZE($1)
bgt $2,$L51
.align 4
$L52:
and $16, 7, $2
ble $2,$L54
.align 4
$L53:
LD $f12, 0($1)
lda $2, -1($2)
MUL $f19, $f12, $f23
ST $f23, 0($1)
lda $1, SIZE($1)
bgt $2,$L53
.align 4
$L54:
SXADDQ $19, $18, $18 # c += ldc
bgt $17,$BETA_NE_ZERO
clr $0
ret
.align 4
$BETA_EQ_ZERO:
sra $16, 3, $2 # i = (m >> 3)
lda $4, 8*SIZE($18)
mov $18, $1 # c_offset = c
lda $17, -1($17) # j --
ble $2,$L42
.align 4
$L41:
ST $f31, 0*SIZE($1)
ST $f31, 1*SIZE($1)
ST $f31, 2*SIZE($1)
ST $f31, 3*SIZE($1)
ST $f31, 4*SIZE($1)
ST $f31, 5*SIZE($1)
ST $f31, 6*SIZE($1)
ST $f31, 7*SIZE($1)
lda $2, -1($2)
lda $4, 8*SIZE($4)
lda $1, 8*SIZE($1)
bgt $2,$L41
.align 4
$L42:
and $16, 7, $2
ble $2,$L44
.align 4
$L43:
lda $2, -1($2)
ST $f31, 0($1)
lda $1, SIZE($1)
bgt $2, $L43
.align 4
$L44:
SXADDQ $19, $18, $18 # c += ldc
bgt $17,$BETA_EQ_ZERO
clr $0
.align 4
$End:
ret
.ident VERSION
.end CNAME

File diff suppressed because it is too large Load Diff

1307
kernel/alpha/gemv_n.S Normal file

File diff suppressed because it is too large Load Diff

1061
kernel/alpha/gemv_t.S Normal file

File diff suppressed because it is too large Load Diff

440
kernel/alpha/iamax.S Normal file
View File

@@ -0,0 +1,440 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#define XX $19
#ifndef USE_MIN
#define CMPLT(a, b) cmptlt a, b
#else
#define CMPLT(a, b) cmptlt b, a
#endif
#define STACKSIZE 6 * 8
PROLOGUE
PROFCODE
.frame $sp, STACKSIZE, $26, 0
#ifdef F_INTERFACE
ldl N, 0(N) # n
ldl INCX, 0(INCX) # incx
#endif
lda $sp, -STACKSIZE($sp)
mov X, XX
.align 4
stt $f2, 0($sp)
fclr $f16
cmplt $31, N, $2
unop
stt $f3, 8($sp)
fclr $f17
cmplt $31, INCX, $3
unop
stt $f4, 16($sp)
fclr $f18
SXADDQ INCX, $31, INCX
unop
stt $f5, 24($sp)
fclr $f19
and $2, $3, $2
clr $0
stt $f6, 32($sp)
fclr $f0
sra N, 3, $1
beq $2, $End # if (n <= 0) or (incx <= 0) return
.align 4
LD $f20, 0 * SIZE(X)
unop
fabs $f20, $f0
ble $1, $L15
.align 4
fabs $f20, $f1
unop
addq X, INCX, X
unop
LD $f21, 0 * SIZE(X)
fabs $f20, $f2
addq X, INCX, X
unop
LD $f22, 0 * SIZE(X)
fabs $f20, $f3
addq X, INCX, X
unop
LD $f23, 0 * SIZE(X)
fabs $f20, $f4
addq X, INCX, X
unop
LD $f24, 0 * SIZE(X)
addq X, INCX, X
fabs $f20, $f5
unop
LD $f25, 0 * SIZE(X)
fabs $f20, $f6
addq X, INCX, X
unop
LD $f26, 0 * SIZE(X)
fabs $f20, $f28
addq X, INCX, X
lda $1, -1($1)
LD $f27, 0 * SIZE(X)
unop
addq X, INCX, X
ble $1, $L13
.align 4
$L12:
fcmovne $f16, $f12, $f4
unop
fabs $f20, $f29
ldl $31, 56 * SIZE(X)
fcmovne $f17, $f13, $f5
LD $f20, 0 * SIZE(X)
fabs $f21, $f30
addq X, INCX, X
fcmovne $f18, $f14, $f6
LD $f21, 0 * SIZE(X)
fabs $f22, $f10
addq X, INCX, X
fcmovne $f19, $f15, $f28
LD $f22, 0 * SIZE(X)
fabs $f23, $f11
addq X, INCX, X
fabs $f24, $f12
LD $f23, 0 * SIZE(X)
CMPLT($f0, $f29), $f16
addq X, INCX, X
fabs $f25, $f13
LD $f24, 0 * SIZE(X)
CMPLT($f1, $f30), $f17
addq X, INCX, X
fabs $f26, $f14
LD $f25, 0 * SIZE(X)
CMPLT($f2, $f10), $f18
addq X, INCX, X
fabs $f27, $f15
LD $f26, 0 * SIZE(X)
CMPLT($f3, $f11), $f19
addq X, INCX, X
fcmovne $f16, $f29, $f0
LD $f27, 0 * SIZE(X)
CMPLT($f4, $f12), $f16
addq X, INCX, X
fcmovne $f17, $f30, $f1
unop
CMPLT($f5, $f13), $f17
lda $1, -1($1) # i --
fcmovne $f18, $f10, $f2
unop
CMPLT($f6, $f14), $f18
unop
fcmovne $f19, $f11, $f3
unop
CMPLT($f28, $f15), $f19
bgt $1,$L12
.align 4
$L13:
fcmovne $f16, $f12, $f4
fabs $f20, $f29
fcmovne $f17, $f13, $f5
fabs $f21, $f30
fcmovne $f18, $f14, $f6
fabs $f22, $f10
fcmovne $f19, $f15, $f28
fabs $f23, $f11
fabs $f24, $f12
CMPLT($f0, $f29), $f16
fabs $f25, $f13
CMPLT($f1, $f30), $f17
fabs $f26, $f14
CMPLT($f2, $f10), $f18
fabs $f27, $f15
CMPLT($f3, $f11), $f19
fcmovne $f16, $f29, $f0
CMPLT($f4, $f12), $f16
fcmovne $f17, $f30, $f1
CMPLT($f5, $f13), $f17
fcmovne $f18, $f10, $f2
CMPLT($f6, $f14), $f18
fcmovne $f19, $f11, $f3
CMPLT($f28, $f15), $f19
fcmovne $f16, $f12, $f4
CMPLT($f0, $f1), $f16
fcmovne $f17, $f13, $f5
CMPLT($f2, $f3), $f17
fcmovne $f18, $f14, $f6
CMPLT($f4, $f5), $f18
fcmovne $f19, $f15, $f28
CMPLT($f6, $f28), $f19
fcmovne $f16, $f1, $f0
fcmovne $f17, $f3, $f2
fcmovne $f18, $f5, $f4
fcmovne $f19, $f28, $f6
CMPLT($f0, $f2), $f16
CMPLT($f4, $f6), $f17
fcmovne $f16, $f2, $f0
fcmovne $f17, $f6, $f4
CMPLT($f0, $f4), $f16
fcmovne $f16, $f4, $f0
.align 4
$L15:
and N, 7, $1
unop
unop
ble $1, $L20
.align 4
$L16:
LD $f20, 0 * SIZE(X)
addq X, INCX, X
fabs $f20, $f29
CMPLT($f0, $f29), $f16
fcmovne $f16, $f29, $f0
lda $1, -1($1) # i --
bgt $1, $L16
.align 4
$L20:
sra N, 3, $1
ble $1, $L40
.align 4
LD $f10, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f11, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f12, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f13, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f14, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f15, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f16, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f17, 0 * SIZE(XX)
addq XX, INCX, XX
fabs $f10, $f18
fabs $f11, $f19
fabs $f12, $f20
fabs $f13, $f21
lda $1, -1($1)
ble $1, $L23
.align 4
$L22:
LD $f10, 0 * SIZE(XX)
fabs $f14, $f22
addq XX, INCX, XX
cmpteq $f0, $f18, $f2
LD $f11, 0 * SIZE(XX)
fabs $f15, $f23
addq XX, INCX, XX
cmpteq $f0, $f19, $f3
LD $f12, 0 * SIZE(XX)
fabs $f16, $f24
addq XX, INCX, XX
cmpteq $f0, $f20, $f4
LD $f13, 0 * SIZE(XX)
fabs $f17, $f25
addq XX, INCX, XX
cmpteq $f0, $f21, $f5
LD $f14, 0 * SIZE(XX)
lda $1, -1($1) # i --
cmpteq $f0, $f22, $f26
addq XX, INCX, XX
lda $0, 1($0)
fbne $f2, $End
LD $f15, 0 * SIZE(XX)
cmpteq $f0, $f23, $f27
lda $0, 1($0)
fbne $f3, $End
addq XX, INCX, XX
cmpteq $f0, $f24, $f28
lda $0, 1($0)
fbne $f4, $End
LD $f16, 0 * SIZE(XX)
cmpteq $f0, $f25, $f29
lda $0, 1($0)
fbne $f5, $End
addq XX, INCX, XX
lda $0, 1($0)
fabs $f10, $f18
fbne $f26, $End
LD $f17, 0 * SIZE(XX)
lda $0, 1($0)
fabs $f11, $f19
fbne $f27, $End
addq XX, INCX, XX
lda $0, 1($0)
fabs $f12, $f20
fbne $f28, $End
lda $0, 1($0)
fabs $f13, $f21
fbne $f29, $End
bgt $1, $L22
.align 4
$L23:
fabs $f14, $f22
cmpteq $f0, $f18, $f2
fabs $f15, $f23
cmpteq $f0, $f19, $f3
fabs $f16, $f24
cmpteq $f0, $f20, $f4
fabs $f17, $f25
cmpteq $f0, $f21, $f5
cmpteq $f0, $f22, $f26
lda $0, 1($0)
unop
fbne $f2, $End
cmpteq $f0, $f23, $f27
lda $0, 1($0)
unop
fbne $f3, $End
cmpteq $f0, $f24, $f28
lda $0, 1($0)
unop
fbne $f4, $End
cmpteq $f0, $f25, $f29
lda $0, 1($0)
unop
fbne $f5, $End
lda $0, 1($0)
fbne $f26, $End
lda $0, 1($0)
fbne $f27, $End
lda $0, 1($0)
fbne $f28, $End
lda $0, 1($0)
fbne $f29, $End
.align 4
$L40:
LD $f20, 0 * SIZE(XX)
addq XX, INCX, XX
fabs $f20, $f25
cmpteq $f0, $f25, $f29
lda $0, 1($0)
fbne $f29, $End
br $31, $L40
.align 4
$End:
ldt $f2, 0($sp)
ldt $f3, 8($sp)
ldt $f4, 16($sp)
ldt $f5, 24($sp)
ldt $f6, 32($sp)
lda $sp, STACKSIZE($sp)
ret
EPILOGUE

351
kernel/alpha/imax.S Normal file
View File

@@ -0,0 +1,351 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#define XX $19
#ifndef USE_MIN
#define CMPLT(a, b) cmptlt a, b
#else
#define CMPLT(a, b) cmptlt b, a
#endif
#define STACKSIZE 8 * 8
PROLOGUE
PROFCODE
clr $0
mov X, XX
.align 4
cmplt $31, N, $2
cmplt $31, INCX, $3
SXADDQ INCX, $31, INCX
and $2, $3, $2
sra N, 3, $1
fclr $f0
unop
beq $2, $End # if (n <= 0) or (incx <= 0) return
.align 4
LD $f0, 0 * SIZE(X)
unop
unop
ble $1, $L15
.align 4
fmov $f0, $f1
addq X, INCX, X
fmov $f0, $f10
lda $1, -1($1)
LD $f21, 0 * SIZE(X)
fmov $f0, $f11
addq X, INCX, X
fmov $f0, $f12
LD $f22, 0 * SIZE(X)
fmov $f0, $f13
addq X, INCX, X
fmov $f0, $f14
LD $f23, 0 * SIZE(X)
fmov $f0, $f15
addq X, INCX, X
fmov $f0, $f20
LD $f24, 0 * SIZE(X)
addq X, INCX, X
LD $f25, 0 * SIZE(X)
addq X, INCX, X
LD $f26, 0 * SIZE(X)
addq X, INCX, X
LD $f27, 0 * SIZE(X)
addq X, INCX, X
CMPLT($f0, $f20), $f16
CMPLT($f1, $f21), $f17
CMPLT($f10, $f22), $f18
CMPLT($f11, $f23), $f19
ble $1, $L13
.align 4
$L12:
fcmovne $f16, $f20, $f0
LD $f20, 0 * SIZE(X)
CMPLT($f12, $f24), $f16
addq X, INCX, X
fcmovne $f17, $f21, $f1
LD $f21, 0 * SIZE(X)
CMPLT($f13, $f25), $f17
addq X, INCX, X
fcmovne $f18, $f22, $f10
LD $f22, 0 * SIZE(X)
CMPLT($f14, $f26), $f18
addq X, INCX, X
fcmovne $f19, $f23, $f11
LD $f23, 0 * SIZE(X)
CMPLT($f15, $f27), $f19
addq X, INCX, X
fcmovne $f16, $f24, $f12
LD $f24, 0 * SIZE(X)
CMPLT($f0, $f20), $f16
addq X, INCX, X
fcmovne $f17, $f25, $f13
LD $f25, 0 * SIZE(X)
CMPLT($f1, $f21), $f17
addq X, INCX, X
fcmovne $f18, $f26, $f14
LD $f26, 0 * SIZE(X)
CMPLT($f10, $f22), $f18
addq X, INCX, X
fcmovne $f19, $f27, $f15
LD $f27, 0 * SIZE(X)
CMPLT($f11, $f23), $f19
lda $1, -1($1) # i --
addq X, INCX, X
unop
unop
bgt $1,$L12
.align 4
$L13:
fcmovne $f16, $f20, $f0
CMPLT($f12, $f24), $f16
fcmovne $f17, $f21, $f1
CMPLT($f13, $f25), $f17
fcmovne $f18, $f22, $f10
CMPLT($f14, $f26), $f18
fcmovne $f19, $f23, $f11
CMPLT($f15, $f27), $f19
fcmovne $f16, $f24, $f12
CMPLT($f0, $f1), $f16
fcmovne $f17, $f25, $f13
CMPLT($f10, $f11), $f17
fcmovne $f18, $f26, $f14
CMPLT($f12, $f13), $f18
fcmovne $f19, $f27, $f15
CMPLT($f14, $f15), $f19
fcmovne $f16, $f1, $f0
fcmovne $f17, $f11, $f10
fcmovne $f18, $f13, $f12
fcmovne $f19, $f15, $f14
CMPLT($f0, $f10), $f16
CMPLT($f12, $f14), $f17
fcmovne $f16, $f10, $f0
fcmovne $f17, $f14, $f12
CMPLT($f0, $f12), $f16
fcmovne $f16, $f12, $f0
.align 4
$L15:
and N, 7, $1
unop
unop
ble $1, $L20
.align 4
$L16:
LD $f20, 0 * SIZE(X)
addq X, INCX, X
CMPLT($f0, $f20), $f16
fcmovne $f16, $f20, $f0
lda $1, -1($1) # i --
bgt $1, $L16
.align 4
$L20:
sra N, 3, $1
ble $1, $L40
.align 4
LD $f10, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f11, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f12, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f13, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f14, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f15, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f16, 0 * SIZE(XX)
addq XX, INCX, XX
LD $f17, 0 * SIZE(XX)
addq XX, INCX, XX
cmpteq $f0, $f10, $f20
cmpteq $f0, $f11, $f21
cmpteq $f0, $f12, $f22
cmpteq $f0, $f13, $f23
lda $1, -1($1)
ble $1, $L23
.align 4
$L22:
LD $f10, 0 * SIZE(XX)
cmpteq $f0, $f14, $f24
lda $0, 1($0)
addq XX, INCX, XX
fbne $f20, $End
LD $f11, 0 * SIZE(XX)
cmpteq $f0, $f15, $f25
lda $0, 1($0)
addq XX, INCX, XX
fbne $f21, $End
LD $f12, 0 * SIZE(XX)
cmpteq $f0, $f16, $f26
lda $0, 1($0)
addq XX, INCX, XX
fbne $f22, $End
LD $f13, 0 * SIZE(XX)
cmpteq $f0, $f17, $f27
lda $0, 1($0)
addq XX, INCX, XX
fbne $f23, $End
LD $f14, 0 * SIZE(XX)
cmpteq $f0, $f10, $f20
lda $0, 1($0)
addq XX, INCX, XX
fbne $f24, $End
LD $f15, 0 * SIZE(XX)
cmpteq $f0, $f11, $f21
lda $0, 1($0)
addq XX, INCX, XX
fbne $f25, $End
LD $f16, 0 * SIZE(XX)
lda $1, -1($1) # i --
cmpteq $f0, $f12, $f22
lda $0, 1($0)
addq XX, INCX, XX
fbne $f26, $End
LD $f17, 0 * SIZE(XX)
cmpteq $f0, $f13, $f23
lda $0, 1($0)
addq XX, INCX, XX
fbne $f27, $End
bgt $1, $L22
.align 4
$L23:
lda $0, 1($0)
cmpteq $f0, $f14, $f24
unop
fbne $f20, $End
lda $0, 1($0)
cmpteq $f0, $f15, $f25
unop
fbne $f21, $End
lda $0, 1($0)
cmpteq $f0, $f16, $f26
unop
fbne $f22, $End
lda $0, 1($0)
cmpteq $f0, $f17, $f27
unop
fbne $f23, $End
lda $0, 1($0)
fbne $f24, $End
lda $0, 1($0)
fbne $f25, $End
lda $0, 1($0)
fbne $f26, $End
lda $0, 1($0)
fbne $f27, $End
.align 4
$L40:
LD $f20, 0 * SIZE(XX)
addq XX, INCX, XX
cmpteq $f0, $f20, $f29
lda $0, 1($0)
fbne $f29, $End
br $31, $L40
.align 4
$End:
ret
EPILOGUE

427
kernel/alpha/izamax.S Normal file
View File

@@ -0,0 +1,427 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#define XX $19
#ifndef USE_MIN
#define CMPLT(a, b) cmptlt a, b
#else
#define CMPLT(a, b) cmptlt b, a
#endif
#define STACKSIZE 8 * 8
PROLOGUE
PROFCODE
lda $sp, -STACKSIZE($sp)
stt $f2, 0($sp)
fclr $f16
cmplt $31, N, $2
unop
stt $f3, 8($sp)
fclr $f17
cmplt $31, INCX, $3
unop
stt $f4, 16($sp)
fclr $f18
SXADDQ INCX, $31, INCX
unop
stt $f5, 24($sp)
fclr $f19
and $2, $3, $2
clr $0
stt $f6, 32($sp)
mov X, XX
stt $f7, 40($sp)
stt $f8, 48($sp)
stt $f9, 56($sp)
fclr $f0
beq $2, $End # if (n <= 0) or (incx <= 0) return
.align 4
LD $f20, 0 * SIZE(X)
LD $f21, 1 * SIZE(X)
sra N, 2, $1
addq INCX, INCX, INCX
fabs $f20, $f20
fabs $f21, $f21
addt $f20, $f21, $f0
ble $1, $L15
.align 4
lda $1, -1($1)
unop
addq X, INCX, X
unop
LD $f22, 0 * SIZE(X)
fmov $f0, $f1
LD $f23, 1 * SIZE(X)
addq X, INCX, X
LD $f24, 0 * SIZE(X)
fmov $f0, $f2
LD $f25, 1 * SIZE(X)
addq X, INCX, X
LD $f26, 0 * SIZE(X)
fmov $f0, $f3
LD $f27, 1 * SIZE(X)
addq X, INCX, X
fabs $f20, $f8
fabs $f21, $f9
fabs $f22, $f10
fabs $f23, $f11
fabs $f24, $f12
fabs $f25, $f13
fabs $f26, $f14
fabs $f27, $f15
ble $1, $L14
.align 4
LD $f20, 0 * SIZE(X)
LD $f21, 1 * SIZE(X)
lda $1, -1($1)
addq X, INCX, X
LD $f22, 0 * SIZE(X)
LD $f23, 1 * SIZE(X)
unop
addq X, INCX, X
LD $f24, 0 * SIZE(X)
LD $f25, 1 * SIZE(X)
unop
addq X, INCX, X
LD $f26, 0 * SIZE(X)
LD $f27, 1 * SIZE(X)
addq X, INCX, X
ble $1, $L13
.align 4
$L12:
addt $f8, $f9, $f16
unop
fabs $f20, $f8
ldl $31, 64 * SIZE(X)
addt $f10, $f11, $f17
unop
fabs $f21, $f9
LD $f20, 0 * SIZE(X)
addt $f12, $f13, $f18
LD $f21, 1 * SIZE(X)
fabs $f22, $f10
addq X, INCX, X
addt $f14, $f15, $f19
LD $f22, 0 * SIZE(X)
fabs $f23, $f11
unop
CMPLT($f0, $f16), $f4
LD $f23, 1 * SIZE(X)
fabs $f24, $f12
addq X, INCX, X
CMPLT($f1, $f17), $f5
LD $f24, 0 * SIZE(X)
fabs $f25, $f13
unop
CMPLT($f2, $f18), $f6
LD $f25, 1 * SIZE(X)
fabs $f26, $f14
addq X, INCX, X
CMPLT($f3, $f19), $f7
LD $f26, 0 * SIZE(X)
fabs $f27, $f15
unop
fcmovne $f4, $f16, $f0
LD $f27, 1 * SIZE(X)
addq X, INCX, X
lda $1, -1($1) # i --
fcmovne $f5, $f17, $f1
fcmovne $f6, $f18, $f2
fcmovne $f7, $f19, $f3
bgt $1,$L12
.align 4
$L13:
addt $f8, $f9, $f16
fabs $f20, $f8
addt $f10, $f11, $f17
fabs $f21, $f9
addt $f12, $f13, $f18
fabs $f22, $f10
addt $f14, $f15, $f19
fabs $f23, $f11
CMPLT($f0, $f16), $f4
fabs $f24, $f12
CMPLT($f1, $f17), $f5
fabs $f25, $f13
CMPLT($f2, $f18), $f6
fabs $f26, $f14
CMPLT($f3, $f19), $f7
fabs $f27, $f15
fcmovne $f4, $f16, $f0
fcmovne $f5, $f17, $f1
fcmovne $f6, $f18, $f2
fcmovne $f7, $f19, $f3
.align 4
$L14:
addt $f8, $f9, $f16
addt $f10, $f11, $f17
addt $f12, $f13, $f18
addt $f14, $f15, $f19
CMPLT($f0, $f16), $f4
CMPLT($f1, $f17), $f5
CMPLT($f2, $f18), $f6
CMPLT($f3, $f19), $f7
fcmovne $f4, $f16, $f0
fcmovne $f5, $f17, $f1
fcmovne $f6, $f18, $f2
fcmovne $f7, $f19, $f3
CMPLT($f0, $f1), $f16
CMPLT($f2, $f3), $f17
fcmovne $f16, $f1, $f0
fcmovne $f17, $f3, $f2
CMPLT($f0, $f2), $f16
fcmovne $f16, $f2, $f0
.align 4
$L15:
and N, 3, $1
unop
unop
ble $1, $L20
.align 4
$L16:
LD $f20, 0 * SIZE(X)
LD $f21, 1 * SIZE(X)
unop
addq X, INCX, X
fabs $f20, $f29
fabs $f21, $f30
addt $f29, $f30, $f29
CMPLT($f0, $f29), $f16
fcmovne $f16, $f29, $f0
lda $1, -1($1) # i --
bgt $1, $L16
.align 4
$L20:
sra N, 2, $1
ble $1, $L40
.align 4
LD $f10, 0 * SIZE(XX)
LD $f11, 1 * SIZE(XX)
addq XX, INCX, XX
LD $f12, 0 * SIZE(XX)
LD $f13, 1 * SIZE(XX)
addq XX, INCX, XX
LD $f14, 0 * SIZE(XX)
LD $f15, 1 * SIZE(XX)
addq XX, INCX, XX
LD $f16, 0 * SIZE(XX)
LD $f17, 1 * SIZE(XX)
addq XX, INCX, XX
fabs $f10, $f18
fabs $f11, $f19
fabs $f12, $f20
fabs $f13, $f21
lda $1, -1($1)
ble $1, $L23
.align 4
$L22:
LD $f10, 0 * SIZE(XX)
fabs $f14, $f22
LD $f11, 1 * SIZE(XX)
addq XX, INCX, XX
LD $f12, 0 * SIZE(XX)
fabs $f15, $f23
LD $f13, 1 * SIZE(XX)
addq XX, INCX, XX
LD $f14, 0 * SIZE(XX)
fabs $f16, $f24
LD $f15, 1 * SIZE(XX)
addq XX, INCX, XX
LD $f16, 0 * SIZE(XX)
fabs $f17, $f25
LD $f17, 1 * SIZE(XX)
addq XX, INCX, XX
addt $f18, $f19, $f4
addt $f20, $f21, $f5
addt $f22, $f23, $f6
addt $f24, $f25, $f7
cmpteq $f0, $f4, $f26
cmpteq $f0, $f5, $f27
cmpteq $f0, $f6, $f28
cmpteq $f0, $f7, $f29
fabs $f10, $f18
lda $0, 1($0)
lda $1, -1($1) # i --
fbne $f26, $End
fabs $f11, $f19
lda $0, 1($0)
unop
fbne $f27, $End
fabs $f12, $f20
lda $0, 1($0)
unop
fbne $f28, $End
fabs $f13, $f21
lda $0, 1($0)
fbne $f29, $End
bgt $1, $L22
.align 4
$L23:
fabs $f14, $f22
fabs $f15, $f23
fabs $f16, $f24
fabs $f17, $f25
addt $f18, $f19, $f4
addt $f20, $f21, $f5
addt $f22, $f23, $f6
addt $f24, $f25, $f7
cmpteq $f0, $f4, $f26
cmpteq $f0, $f5, $f27
cmpteq $f0, $f6, $f28
cmpteq $f0, $f7, $f29
lda $0, 1($0)
fbne $f26, $End
lda $0, 1($0)
fbne $f27, $End
lda $0, 1($0)
fbne $f28, $End
lda $0, 1($0)
fbne $f29, $End
.align 4
$L40:
LD $f10, 0 * SIZE(XX)
LD $f11, 1 * SIZE(XX)
addq XX, INCX, XX
fabs $f10, $f18
fabs $f11, $f19
addt $f18, $f19, $f18
cmpteq $f0, $f18, $f2
lda $0, 1($0)
fbne $f2, $End
br $31, $L40
.align 4
$End:
ldt $f2, 0($sp)
ldt $f3, 8($sp)
ldt $f4, 16($sp)
ldt $f5, 24($sp)
ldt $f6, 32($sp)
ldt $f7, 40($sp)
ldt $f8, 48($sp)
ldt $f9, 56($sp)
lda $sp, STACKSIZE($sp)
ret
EPILOGUE

76
kernel/alpha/lsame.S Normal file
View File

@@ -0,0 +1,76 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include "version.h"
.set noat
.set noreorder
.text
.align 5
.globl lsame_
.ent lsame_
lsame_:
.frame $sp,0,$26,0
#ifdef PROFILE
ldgp $gp, 0($27)
lda $28, _mcount
jsr $28, ($28), _mcount
.prologue 1
#else
.prologue 0
#endif
ldq_u $5, 0($16)
ldq_u $6, 0($17)
extbl $5, $16, $5
extbl $6, $17, $6
subl $5, 96, $1
subl $6, 96, $2
subl $5, 32, $3
subl $6, 32, $4
cmovgt $1, $3, $5
cmovgt $2, $4, $6
cmpeq $5, $6, $0
.align 4
$End:
ret
.end lsame_
.ident VERSION

227
kernel/alpha/max.S Normal file
View File

@@ -0,0 +1,227 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#ifndef USE_MIN
#define CMPLT(a, b) cmptlt a, b
#else
#define CMPLT(a, b) cmptlt b, a
#endif
#define STACKSIZE 8 * 8
PROLOGUE
PROFCODE
.frame $sp, STACKSIZE, $26, 0
#ifdef F_INTERFACE
ldl N, 0(N) # n
ldl INCX, 0(INCX) # incx
#endif
lda $sp, -STACKSIZE($sp)
nop
.align 4
cmplt $31, N, $2
cmplt $31, INCX, $3
SXADDQ INCX, $31, INCX
and $2, $3, $0
sra N, 3, $1
fclr $f0
unop
beq $0, $End # if (n <= 0) or (incx <= 0) return
.align 4
LD $f0, 0 * SIZE(X)
unop
unop
ble $1, $L15
.align 4
fmov $f0, $f1
addq X, INCX, X
fmov $f0, $f10
lda $1, -1($1)
LD $f21, 0 * SIZE(X)
fmov $f0, $f11
addq X, INCX, X
fmov $f0, $f12
LD $f22, 0 * SIZE(X)
fmov $f0, $f13
addq X, INCX, X
fmov $f0, $f14
LD $f23, 0 * SIZE(X)
fmov $f0, $f15
addq X, INCX, X
fmov $f0, $f20
LD $f24, 0 * SIZE(X)
addq X, INCX, X
LD $f25, 0 * SIZE(X)
addq X, INCX, X
LD $f26, 0 * SIZE(X)
addq X, INCX, X
LD $f27, 0 * SIZE(X)
addq X, INCX, X
CMPLT($f0, $f20), $f16
CMPLT($f1, $f21), $f17
CMPLT($f10, $f22), $f18
CMPLT($f11, $f23), $f19
ble $1, $L13
.align 4
$L12:
fcmovne $f16, $f20, $f0
LD $f20, 0 * SIZE(X)
CMPLT($f12, $f24), $f16
addq X, INCX, X
fcmovne $f17, $f21, $f1
LD $f21, 0 * SIZE(X)
CMPLT($f13, $f25), $f17
addq X, INCX, X
fcmovne $f18, $f22, $f10
LD $f22, 0 * SIZE(X)
CMPLT($f14, $f26), $f18
addq X, INCX, X
fcmovne $f19, $f23, $f11
LD $f23, 0 * SIZE(X)
CMPLT($f15, $f27), $f19
addq X, INCX, X
fcmovne $f16, $f24, $f12
LD $f24, 0 * SIZE(X)
CMPLT($f0, $f20), $f16
addq X, INCX, X
fcmovne $f17, $f25, $f13
LD $f25, 0 * SIZE(X)
CMPLT($f1, $f21), $f17
addq X, INCX, X
fcmovne $f18, $f26, $f14
LD $f26, 0 * SIZE(X)
CMPLT($f10, $f22), $f18
addq X, INCX, X
fcmovne $f19, $f27, $f15
LD $f27, 0 * SIZE(X)
CMPLT($f11, $f23), $f19
lda $1, -1($1) # i --
addq X, INCX, X
unop
unop
bgt $1,$L12
.align 4
$L13:
fcmovne $f16, $f20, $f0
CMPLT($f12, $f24), $f16
fcmovne $f17, $f21, $f1
CMPLT($f13, $f25), $f17
fcmovne $f18, $f22, $f10
CMPLT($f14, $f26), $f18
fcmovne $f19, $f23, $f11
CMPLT($f15, $f27), $f19
fcmovne $f16, $f24, $f12
CMPLT($f0, $f1), $f16
fcmovne $f17, $f25, $f13
CMPLT($f10, $f11), $f17
fcmovne $f18, $f26, $f14
CMPLT($f12, $f13), $f18
fcmovne $f19, $f27, $f15
CMPLT($f14, $f15), $f19
fcmovne $f16, $f1, $f0
fcmovne $f17, $f11, $f10
fcmovne $f18, $f13, $f12
fcmovne $f19, $f15, $f14
CMPLT($f0, $f10), $f16
CMPLT($f12, $f14), $f17
fcmovne $f16, $f10, $f0
fcmovne $f17, $f14, $f12
CMPLT($f0, $f12), $f16
fcmovne $f16, $f12, $f0
.align 4
$L15:
and N, 7, $1
unop
unop
ble $1, $End
.align 4
$L16:
LD $f20, 0 * SIZE(X)
addq X, INCX, X
CMPLT($f0, $f20), $f16
fcmovne $f16, $f20, $f0
lda $1, -1($1) # i --
bgt $1, $L16
.align 4
$End:
lda $sp, STACKSIZE($sp)
ret
EPILOGUE

624
kernel/alpha/rot.S Normal file
View File

@@ -0,0 +1,624 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#define Y $19
#define INCY $20
#define I $21
#define XX $23
#define YY $24
#define C $f10
#define S $f11
#define PREFETCH_SIZE 80
PROLOGUE
PROFCODE
.frame $sp, 0, $26, 0
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
fmov $f21, C
LD S, 0($sp)
cmpeq INCX, 1, $23
cmpeq INCY, 1, $24
ble N, $L998
and $23, $24, $23
beq $23, $L50
sra N, 3, I
ble I, $L15
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
LD $f14, 1*SIZE(X)
LD $f15, 1*SIZE(Y)
LD $f16, 2*SIZE(X)
LD $f17, 2*SIZE(Y)
LD $f18, 3*SIZE(X)
LD $f19, 3*SIZE(Y)
MUL C, $f12, $f21
unop
MUL S, $f13, $f22
MUL C, $f13, $f23
LD $f13, 4*SIZE(Y)
MUL S, $f12, $f24
LD $f12, 4*SIZE(X)
MUL C, $f14, $f25
lda I, -1(I)
MUL S, $f15, $f26
ADD $f21, $f22, $f22
MUL C, $f15, $f27
LD $f15, 5*SIZE(Y)
MUL S, $f14, $f28
SUB $f23, $f24, $f24
ble I, $L13
.align 4
$L12:
MUL C, $f16, $f21
lds $f31, (PREFETCH_SIZE) * SIZE(X)
unop
LD $f14, 5*SIZE(X)
ST $f22, 0*SIZE(X)
MUL S, $f17, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f17, $f23
lds $f31, (PREFETCH_SIZE) * SIZE(Y)
unop
LD $f17, 6*SIZE(Y)
ST $f24, 0*SIZE(Y)
MUL S, $f16, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f18, $f25
LD $f16, 6*SIZE(X)
unop
unop
ST $f26, 1*SIZE(X)
MUL S, $f19, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f19, $f27
unop
unop
LD $f19, 7*SIZE(Y)
ST $f28, 1*SIZE(Y)
MUL S, $f18, $f28
unop
SUB $f23, $f24, $f24
MUL C, $f12, $f21
LD $f18, 7*SIZE(X)
unop
unop
ST $f22, 2*SIZE(X)
unop
MUL S, $f13, $f22
ADD $f25, $f26, $f26
MUL C, $f13, $f23
LD $f13, 8*SIZE(Y)
unop
unop
ST $f24, 2*SIZE(Y)
MUL S, $f12, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f14, $f25
LD $f12, 8*SIZE(X)
unop
unop
ST $f26, 3*SIZE(X)
MUL S, $f15, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f15, $f27
LD $f15, 9*SIZE(Y)
unop
unop
ST $f28, 3*SIZE(Y)
MUL S, $f14, $f28
unop
SUB $f23, $f24, $f24
MUL C, $f16, $f21
LD $f14, 9*SIZE(X)
unop
unop
ST $f22, 4*SIZE(X)
MUL S, $f17, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f17, $f23
LD $f17, 10*SIZE(Y)
unop
unop
ST $f24, 4*SIZE(Y)
MUL S, $f16, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f18, $f25
LD $f16, 10*SIZE(X)
unop
unop
ST $f26, 5*SIZE(X)
MUL S, $f19, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f19, $f27
LD $f19, 11*SIZE(Y)
unop
unop
ST $f28, 5*SIZE(Y)
MUL S, $f18, $f28
lda I, -1(I)
SUB $f23, $f24, $f24
MUL C, $f12, $f21
LD $f18, 11*SIZE(X)
unop
unop
ST $f22, 6*SIZE(X)
MUL S, $f13, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f13, $f23
LD $f13, 12*SIZE(Y)
lda X, 8*SIZE(X)
unop
ST $f24, 6*SIZE(Y)
MUL S, $f12, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f14, $f25
LD $f12, 4*SIZE(X)
lda Y, 8*SIZE(Y)
unop
ST $f26, -1*SIZE(X)
MUL S, $f15, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f15, $f27
LD $f15, 5*SIZE(Y)
unop
unop
ST $f28, -1*SIZE(Y)
MUL S, $f14, $f28
SUB $f23, $f24, $f24
bgt I, $L12
.align 4
$L13:
MUL C, $f16, $f21
LD $f14, 5*SIZE(X)
unop
unop
ST $f22, 0*SIZE(X)
MUL S, $f17, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f17, $f23
unop
unop
LD $f17, 6*SIZE(Y)
ST $f24, 0*SIZE(Y)
MUL S, $f16, $f24
LD $f16, 6*SIZE(X)
SUB $f27, $f28, $f28
MUL C, $f18, $f25
unop
unop
unop
ST $f26, 1*SIZE(X)
MUL S, $f19, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f19, $f27
unop
unop
LD $f19, 7*SIZE(Y)
ST $f28, 1*SIZE(Y)
MUL S, $f18, $f28
LD $f18, 7*SIZE(X)
SUB $f23, $f24, $f24
MUL C, $f12, $f21
unop
unop
unop
ST $f22, 2*SIZE(X)
unop
MUL S, $f13, $f22
ADD $f25, $f26, $f26
MUL C, $f13, $f23
unop
unop
unop
ST $f24, 2*SIZE(Y)
MUL S, $f12, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f14, $f25
unop
unop
unop
ST $f26, 3*SIZE(X)
MUL S, $f15, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f15, $f27
unop
unop
unop
ST $f28, 3*SIZE(Y)
MUL S, $f14, $f28
unop
SUB $f23, $f24, $f24
MUL C, $f16, $f21
unop
unop
unop
ST $f22, 4*SIZE(X)
MUL S, $f17, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f17, $f23
unop
unop
unop
ST $f24, 4*SIZE(Y)
MUL S, $f16, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f18, $f25
unop
unop
unop
ST $f26, 5*SIZE(X)
MUL S, $f19, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f19, $f27
unop
unop
unop
ST $f28, 5*SIZE(Y)
MUL S, $f18, $f28
unop
SUB $f23, $f24, $f24
ST $f22, 6*SIZE(X)
ADD $f25, $f26, $f26
ST $f24, 6*SIZE(Y)
SUB $f27, $f28, $f28
ST $f26, 7*SIZE(X)
lda X, 8*SIZE(X)
ST $f28, 7*SIZE(Y)
lda Y, 8*SIZE(Y)
.align 4
$L15:
and N, 7, I
ble I, $L998
.align 4
$L16:
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f25
SUB $f23, $f24, $f26
lda I, -1(I)
ST $f25, 0*SIZE(X)
lda X, 1 * SIZE(X)
ST $f26, 0*SIZE(Y)
lda Y, 1 * SIZE(Y)
bgt I, $L16
.align 4
$L998:
clr $0
ret
.align 4
$L50:
mov X, XX
mov Y, YY
sra N, 3, I
ble I, $L55
.align 4
$L51:
LD $f12, 0*SIZE(X)
SXADDQ INCX, X, X
LD $f13, 0*SIZE(Y)
SXADDQ INCY, Y, Y
LD $f14, 0*SIZE(X)
SXADDQ INCX, X, X
LD $f15, 0*SIZE(Y)
SXADDQ INCY, Y, Y
LD $f16, 0*SIZE(X)
SXADDQ INCX, X, X
LD $f17, 0*SIZE(Y)
SXADDQ INCY, Y, Y
LD $f18, 0*SIZE(X)
SXADDQ INCX, X, X
LD $f19, 0*SIZE(Y)
SXADDQ INCY, Y, Y
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
ST $f22, 0*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f24, 0*SIZE(YY)
SXADDQ INCY, YY, YY
MUL C, $f14, $f25
MUL S, $f15, $f26
MUL C, $f15, $f27
MUL S, $f14, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f26, 0*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f28, 0*SIZE(YY)
SXADDQ INCY, YY, YY
MUL C, $f16, $f21
MUL S, $f17, $f22
MUL C, $f17, $f23
MUL S, $f16, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
ST $f22, 0*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f24, 0*SIZE(YY)
SXADDQ INCY, YY, YY
MUL C, $f18, $f25
MUL S, $f19, $f26
MUL C, $f19, $f27
MUL S, $f18, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f26, 0*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f28, 0*SIZE(YY)
SXADDQ INCY, YY, YY
LD $f12, 0*SIZE(X)
SXADDQ INCX, X, X
LD $f13, 0*SIZE(Y)
SXADDQ INCY, Y, Y
LD $f14, 0*SIZE(X)
SXADDQ INCX, X, X
LD $f15, 0*SIZE(Y)
SXADDQ INCY, Y, Y
LD $f16, 0*SIZE(X)
SXADDQ INCX, X, X
LD $f17, 0*SIZE(Y)
SXADDQ INCY, Y, Y
LD $f18, 0*SIZE(X)
SXADDQ INCX, X, X
LD $f19, 0*SIZE(Y)
SXADDQ INCY, Y, Y
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
ST $f22, 0*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f24, 0*SIZE(YY)
SXADDQ INCY, YY, YY
MUL C, $f14, $f25
MUL S, $f15, $f26
MUL C, $f15, $f27
MUL S, $f14, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f26, 0*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f28, 0*SIZE(YY)
SXADDQ INCY, YY, YY
MUL C, $f16, $f21
MUL S, $f17, $f22
MUL C, $f17, $f23
MUL S, $f16, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
ST $f22, 0*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f24, 0*SIZE(YY)
SXADDQ INCY, YY, YY
MUL C, $f18, $f25
MUL S, $f19, $f26
MUL C, $f19, $f27
MUL S, $f18, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f26, 0*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f28, 0*SIZE(YY)
SXADDQ INCY, YY, YY
lda I, -1(I)
bgt I, $L51
.align 4
$L55:
and N, 7, I
ble I, $L999
.align 4
$L56:
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f25
SUB $f23, $f24, $f26
lda I, -1(I)
ST $f25, 0*SIZE(X)
SXADDQ INCX, X, X
ST $f26, 0*SIZE(Y)
SXADDQ INCY, Y, Y
bgt I, $L56
.align 4
$L999:
clr $0
ret
EPILOGUE

480
kernel/alpha/scal.S Normal file
View File

@@ -0,0 +1,480 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCHSIZE 88
#define N $16
#define X $20
#define INCX $21
#define XX $18
#define I $19
#define ALPHA $f19
#define s0 $f0
#define s1 $f1
#define s2 $f10
#define s3 $f11
#define a0 $f12
#define a1 $f13
#define a2 $f14
#define a3 $f15
#define a4 $f16
#define a5 $f17
#define a6 $f18
#define a7 $f21
#define t0 $f22
#define t1 $f23
#define t2 $f24
#define t3 $f25
PROLOGUE
PROFCODE
mov X, XX
ble N, $L999
cmpeq INCX, 1, $0
beq $0, $L20
#ifndef DOUBLE
sra N, 4, I
ble I, $L15
LD a0, 0 * SIZE(X)
LD a1, 1 * SIZE(X)
LD a2, 2 * SIZE(X)
LD a3, 3 * SIZE(X)
LD a4, 4 * SIZE(X)
MUL a0, ALPHA, t0
LD a5, 5 * SIZE(X)
MUL a1, ALPHA, t1
LD a6, 6 * SIZE(X)
MUL a2, ALPHA, t2
LD a7, 7 * SIZE(X)
MUL a3, ALPHA, t3
ST t0, 0 * SIZE(X)
MUL a4, ALPHA, t0
ST t1, 1 * SIZE(X)
MUL a5, ALPHA, t1
ST t2, 2 * SIZE(X)
MUL a6, ALPHA, t2
ST t3, 3 * SIZE(X)
MUL a7, ALPHA, t3
LD a0, 8 * SIZE(X)
LD a1, 9 * SIZE(X)
LD a2, 10 * SIZE(X)
LD a3, 11 * SIZE(X)
ST t0, 4 * SIZE(X)
MUL a0, ALPHA, t0
ST t1, 5 * SIZE(X)
MUL a1, ALPHA, t1
ST t2, 6 * SIZE(X)
MUL a2, ALPHA, t2
ST t3, 7 * SIZE(X)
MUL a3, ALPHA, t3
LD a4, 12 * SIZE(X)
LD a5, 13 * SIZE(X)
LD a6, 14 * SIZE(X)
LD a7, 15 * SIZE(X)
lda I, -1(I)
ble I, $L13
.align 4
$L12:
ST t0, 8 * SIZE(X)
MUL a4, ALPHA, t0
ST t1, 9 * SIZE(X)
MUL a5, ALPHA, t1
ST t2, 10 * SIZE(X)
MUL a6, ALPHA, t2
ST t3, 11 * SIZE(X)
MUL a7, ALPHA, t3
LD a0, 16 * SIZE(X)
LD a1, 17 * SIZE(X)
LD a2, 18 * SIZE(X)
LD a3, 19 * SIZE(X)
ST t0, 12 * SIZE(X)
MUL a0, ALPHA, t0
ST t1, 13 * SIZE(X)
MUL a1, ALPHA, t1
ST t2, 14 * SIZE(X)
MUL a2, ALPHA, t2
ST t3, 15 * SIZE(X)
MUL a3, ALPHA, t3
LD a4, 20 * SIZE(X)
LD a5, 21 * SIZE(X)
LD a6, 22 * SIZE(X)
LD a7, 23 * SIZE(X)
ST t0, 16 * SIZE(X)
MUL a4, ALPHA, t0
ST t1, 17 * SIZE(X)
MUL a5, ALPHA, t1
ST t2, 18 * SIZE(X)
MUL a6, ALPHA, t2
ST t3, 19 * SIZE(X)
MUL a7, ALPHA, t3
LD a0, 24 * SIZE(X)
LD a1, 25 * SIZE(X)
LD a2, 26 * SIZE(X)
LD a3, 27 * SIZE(X)
ST t0, 20 * SIZE(X)
MUL a0, ALPHA, t0
ST t1, 21 * SIZE(X)
MUL a1, ALPHA, t1
ST t2, 22 * SIZE(X)
MUL a2, ALPHA, t2
ST t3, 23 * SIZE(X)
MUL a3, ALPHA, t3
LD a4, 28 * SIZE(X)
LD a5, 29 * SIZE(X)
LD a6, 30 * SIZE(X)
LD a7, 31 * SIZE(X)
lds $f31, PREFETCHSIZE * SIZE(X)
lda I, -1(I)
addq X, 16 * SIZE, X
bne I, $L12
.align 4
$L13:
ST t0, 8 * SIZE(X)
MUL a4, ALPHA, t0
ST t1, 9 * SIZE(X)
MUL a5, ALPHA, t1
ST t2, 10 * SIZE(X)
MUL a6, ALPHA, t2
ST t3, 11 * SIZE(X)
MUL a7, ALPHA, t3
ST t0, 12 * SIZE(X)
ST t1, 13 * SIZE(X)
ST t2, 14 * SIZE(X)
ST t3, 15 * SIZE(X)
addq X, 16 * SIZE, X
.align 4
$L15:
and N, 15, I
#else
sra N, 3, I
ble I, $L15
LD a0, 0 * SIZE(X)
LD a1, 1 * SIZE(X)
LD a2, 2 * SIZE(X)
LD a3, 3 * SIZE(X)
LD a4, 4 * SIZE(X)
MUL a0, ALPHA, t0
LD a5, 5 * SIZE(X)
MUL a1, ALPHA, t1
LD a6, 6 * SIZE(X)
MUL a2, ALPHA, t2
LD a7, 7 * SIZE(X)
MUL a3, ALPHA, t3
lda I, -1(I)
ble I, $L13
.align 4
$L12:
ST t0, 0 * SIZE(X)
MUL a4, ALPHA, t0
ST t1, 1 * SIZE(X)
MUL a5, ALPHA, t1
ST t2, 2 * SIZE(X)
MUL a6, ALPHA, t2
ST t3, 3 * SIZE(X)
MUL a7, ALPHA, t3
LD a0, 8 * SIZE(X)
lda I, -1(I)
LD a1, 9 * SIZE(X)
addq X, 8 * SIZE, X
LD a2, 2 * SIZE(X)
LD a3, 3 * SIZE(X)
ST t0, -4 * SIZE(X)
MUL a0, ALPHA, t0
ST t1, -3 * SIZE(X)
MUL a1, ALPHA, t1
ST t2, -2 * SIZE(X)
MUL a2, ALPHA, t2
ST t3, -1 * SIZE(X)
MUL a3, ALPHA, t3
LD a4, 4 * SIZE(X)
LD a5, 5 * SIZE(X)
LD a6, 6 * SIZE(X)
LD a7, 7 * SIZE(X)
lds $f31, PREFETCHSIZE * SIZE(X)
bne I, $L12
.align 4
$L13:
ST t0, 0 * SIZE(X)
MUL a4, ALPHA, t0
ST t1, 1 * SIZE(X)
MUL a5, ALPHA, t1
ST t2, 2 * SIZE(X)
MUL a6, ALPHA, t2
ST t3, 3 * SIZE(X)
MUL a7, ALPHA, t3
ST t0, 4 * SIZE(X)
ST t1, 5 * SIZE(X)
ST t2, 6 * SIZE(X)
ST t3, 7 * SIZE(X)
addq X, 8 * SIZE, X
.align 4
$L15:
and N, 7, I
#endif
unop
unop
ble I, $L999
.align 4
$L17:
LD a0, 0 * SIZE(X)
MUL a0, ALPHA, t0
ST t0, 0 * SIZE(X)
addq X, SIZE, X
lda I, -1(I)
bne I, $L17
ret
.align 4
$L20:
sra N, 3, I
ble I, $L25
LD a0, 0 * SIZE(X)
SXADDQ INCX, X, X
LD a1, 0 * SIZE(X)
SXADDQ INCX, X, X
LD a2, 0 * SIZE(X)
SXADDQ INCX, X, X
LD a3, 0 * SIZE(X)
SXADDQ INCX, X, X
LD a4, 0 * SIZE(X)
MUL a0, ALPHA, t0
lda I, -1(I)
SXADDQ INCX, X, X
LD a5, 0 * SIZE(X)
MUL a1, ALPHA, t1
SXADDQ INCX, X, X
unop
LD a6, 0 * SIZE(X)
MUL a2, ALPHA, t2
SXADDQ INCX, X, X
unop
LD a7, 0 * SIZE(X)
MUL a3, ALPHA, t3
SXADDQ INCX, X, X
ble I, $L23
.align 4
$L22:
ST t0, 0 * SIZE(XX)
MUL a4, ALPHA, t0
lds $f31, PREFETCHSIZE * SIZE(X)
SXADDQ INCX, XX, XX
LD a0, 0 * SIZE(X)
SXADDQ INCX, X, X
lda I, -1(I)
unop
ST t1, 0 * SIZE(XX)
MUL a5, ALPHA, t1
SXADDQ INCX, XX, XX
unop
LD a1, 0 * SIZE(X)
SXADDQ INCX, X, X
ST t2, 0 * SIZE(XX)
MUL a6, ALPHA, t2
SXADDQ INCX, XX, XX
unop
LD a2, 0 * SIZE(X)
SXADDQ INCX, X, X
ST t3, 0 * SIZE(XX)
MUL a7, ALPHA, t3
SXADDQ INCX, XX, XX
unop
LD a3, 0 * SIZE(X)
SXADDQ INCX, X, X
ST t0, 0 * SIZE(XX)
MUL a0, ALPHA, t0
SXADDQ INCX, XX, XX
unop
LD a4, 0 * SIZE(X)
SXADDQ INCX, X, X
ST t1, 0 * SIZE(XX)
MUL a1, ALPHA, t1
SXADDQ INCX, XX, XX
unop
LD a5, 0 * SIZE(X)
SXADDQ INCX, X, X
ST t2, 0 * SIZE(XX)
MUL a2, ALPHA, t2
SXADDQ INCX, XX, XX
unop
LD a6, 0 * SIZE(X)
SXADDQ INCX, X, X
ST t3, 0 * SIZE(XX)
MUL a3, ALPHA, t3
SXADDQ INCX, XX, XX
unop
LD a7, 0 * SIZE(X)
SXADDQ INCX, X, X
unop
bne I, $L22
.align 4
$L23:
ST t0, 0 * SIZE(XX)
MUL a4, ALPHA, t0
SXADDQ INCX, XX, XX
ST t1, 0 * SIZE(XX)
MUL a5, ALPHA, t1
SXADDQ INCX, XX, XX
ST t2, 0 * SIZE(XX)
MUL a6, ALPHA, t2
SXADDQ INCX, XX, XX
ST t3, 0 * SIZE(XX)
MUL a7, ALPHA, t3
SXADDQ INCX, XX, XX
ST t0, 0 * SIZE(XX)
SXADDQ INCX, XX, XX
ST t1, 0 * SIZE(XX)
SXADDQ INCX, XX, XX
ST t2, 0 * SIZE(XX)
SXADDQ INCX, XX, XX
ST t3, 0 * SIZE(XX)
SXADDQ INCX, XX, XX
.align 4
$L25:
and N, 7, I
unop
unop
ble I, $L999
.align 4
$L27:
LD a0, 0 * SIZE(X)
MUL a0, ALPHA, t0
ST t0, 0 * SIZE(XX)
SXADDQ INCX, X, X
SXADDQ INCX, XX, XX
lda I, -1(I)
bne I, $L27
.align 4
$L999:
ret
EPILOGUE

431
kernel/alpha/snrm2.S Normal file
View File

@@ -0,0 +1,431 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCH_SIZE 80
#define N $16
#define X $17
#define INCX $18
#define XX $19
#define I $0
#define a0 $f0
#define a1 $f1
#define a2 $f10
#define a3 $f11
#define t0 $f12
#define t1 $f13
#define t2 $f14
#define t3 $f15
#define x0 $f16
#define x1 $f17
#define x2 $f18
#define x3 $f19
#define x4 $f20
#define x5 $f21
#define x6 $f22
#define x7 $f23
PROLOGUE
#if defined(EV4) || defined(EV5)
.frame $30,16,$26,0
.mask 0x4000000,-16
ldah $29, 0($27) !gpdisp!1
lda $29, 0($29) !gpdisp!1
lda $sp, -16($sp)
ldq $27, sqrt($29) !literal!2
stq $26, 0($sp)
PROFCODE
.prologue 1
#else
PROFCODE
#endif
fclr a0
SXADDQ INCX, 0, INCX
fclr a1
ble N, $L999
fclr a2
cmpeq INCX, SIZE, $0
fclr a3
beq $0, $L20
fclr t0
sra N, 4, I
fclr t1
ble I, $L15
fclr t2
LD x0, 0 * SIZE(X)
fclr t3
LD x1, 1 * SIZE(X)
LD x2, 2 * SIZE(X)
LD x3, 3 * SIZE(X)
LD x4, 4 * SIZE(X)
LD x5, 5 * SIZE(X)
LD x6, 6 * SIZE(X)
LD x7, 7 * SIZE(X)
lda I, -1(I)
ble I, $L12
.align 4
$L11:
addt a0, t0, a0
ldl $31, (PREFETCH_SIZE) * SIZE(X)
mult x0, x0, t0
LD x0, 8 * SIZE(X)
addt a1, t1, a1
mov X, XX
mult x1, x1, t1
LD x1, 9 * SIZE(X)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 10 * SIZE(X)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 11 * SIZE(X)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 12 * SIZE(X)
addt a1, t1, a1
unop
mult x5, x5, t1
LD x5, 13 * SIZE(X)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 14 * SIZE(X)
addt a3, t3, a3
unop
mult x7, x7, t3
LD x7, 15 * SIZE(X)
addt a0, t0, a0
unop
mult x0, x0, t0
LD x0, 16 * SIZE(X)
addt a1, t1, a1
lda X, 16 * SIZE(X)
mult x1, x1, t1
LD x1, 17 * SIZE(XX)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 18 * SIZE(XX)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 19 * SIZE(XX)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 20 * SIZE(XX)
addt a1, t1, a1
lda I, -1(I)
mult x5, x5, t1
LD x5, 21 * SIZE(XX)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 22 * SIZE(XX)
addt a3, t3, a3
mult x7, x7, t3
LD x7, 23 * SIZE(XX)
bgt I, $L11
.align 4
$L12:
addt a0, t0, a0
mov X, XX
mult x0, x0, t0
LD x0, 8 * SIZE(X)
addt a1, t1, a1
unop
mult x1, x1, t1
LD x1, 9 * SIZE(X)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 10 * SIZE(X)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 11 * SIZE(X)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 12 * SIZE(XX)
addt a1, t1, a1
unop
mult x5, x5, t1
LD x5, 13 * SIZE(XX)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 14 * SIZE(XX)
addt a3, t3, a3
lda X, 16 * SIZE(X)
mult x7, x7, t3
LD x7, 15 * SIZE(XX)
addt a0, t0, a0
mult x0, x0, t0
addt a1, t1, a1
mult x1, x1, t1
addt a2, t2, a2
mult x2, x2, t2
addt a3, t3, a3
mult x3, x3, t3
addt a0, t0, a0
mult x4, x4, t0
addt a1, t1, a1
mult x5, x5, t1
addt a2, t2, a2
mult x6, x6, t2
addt a3, t3, a3
mult x7, x7, t3
addt a1, t1, a1
addt a2, t2, a2
addt a3, t3, a3
.align 4
$L15:
and N, 15, I
ble I, $L998
.align 4
$L16:
LD x0, 0 * SIZE(X)
lda X, 1 * SIZE(X)
addt a0, t0, a0
mult x0, x0, t0
lda I, -1(I)
bgt I, $L16
bsr $31, $L998
.align 4
$L20:
fclr t0
sra N, 3, I
fclr t1
ble I, $L25
fclr t2
fclr t3
LD x0, 0 * SIZE(X)
addq X, INCX, X
LD x1, 0 * SIZE(X)
addq X, INCX, X
LD x2, 0 * SIZE(X)
addq X, INCX, X
LD x3, 0 * SIZE(X)
addq X, INCX, X
LD x4, 0 * SIZE(X)
addq X, INCX, X
LD x5, 0 * SIZE(X)
addq X, INCX, X
LD x6, 0 * SIZE(X)
addq X, INCX, X
lda I, -1(I)
ble I, $L22
.align 4
$L21:
addt a0, t0, a0
LD x7, 0 * SIZE(X)
mult x0, x0, t0
addq X, INCX, X
addt a1, t1, a1
LD x0, 0 * SIZE(X)
mult x1, x1, t1
addq X, INCX, X
addt a2, t2, a2
LD x1, 0 * SIZE(X)
mult x2, x2, t2
addq X, INCX, X
addt a3, t3, a3
LD x2, 0 * SIZE(X)
mult x3, x3, t3
addq X, INCX, X
addt a0, t0, a0
LD x3, 0 * SIZE(X)
mult x4, x4, t0
addq X, INCX, X
addt a1, t1, a1
LD x4, 0 * SIZE(X)
mult x5, x5, t1
addq X, INCX, X
addt a2, t2, a2
LD x5, 0 * SIZE(X)
mult x6, x6, t2
addq X, INCX, X
addt a3, t3, a3
LD x6, 0 * SIZE(X)
mult x7, x7, t3
addq X, INCX, X
lda I, -1(I)
bgt I, $L21
.align 4
$L22:
addt a0, t0, a0
LD x7, 0 * SIZE(X)
mult x0, x0, t0
addq X, INCX, X
addt a1, t1, a1
unop
mult x1, x1, t1
unop
addt a2, t2, a2
mult x2, x2, t2
addt a3, t3, a3
mult x3, x3, t3
addt a0, t0, a0
mult x4, x4, t0
addt a1, t1, a1
mult x5, x5, t1
addt a2, t2, a2
mult x6, x6, t2
addt a3, t3, a3
mult x7, x7, t3
addt a1, t1, a1
addt a2, t2, a2
addt a3, t3, a3
.align 4
$L25:
and N, 7, I
ble I, $L998
.align 4
$L26:
LD x0, 0 * SIZE(X)
addq X, INCX, X
addt a0, t0, a0
mult x0, x0, t0
lda I, -1(I)
bgt I, $L26
.align 4
$L998:
addt a0, t0, a0
addt a0, a1, a0
addt a2, a3, a2
#if defined(EV4) || defined(EV5)
addt a0, a2, $f16
jsr $26, ($27), sqrt !lituse_jsr!2
ldah $29, 0($26) !gpdisp!3
lda $29, 0($29) !gpdisp!3
#else
addt a0, a2, a0
sqrtt a0, a0
#endif
.align 4
$L999:
#if defined(EV4) || defined(EV5)
ldq $26, 0($sp)
lda $sp, 16($sp)
#endif
ret
EPILOGUE

View File

@@ -0,0 +1,45 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#ifdef ALLOC_STATIC
.align 8
.comm alloc_area, (NUM_BUFFERS * BUFFER_SIZE), 16384
#endif

249
kernel/alpha/swap.S Normal file
View File

@@ -0,0 +1,249 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
PROLOGUE
PROFCODE
.frame $sp, 0, $26, 0
mov $20, $17
mov $21, $18
ldq $19, 0($sp)
ldl $20, 8($sp)
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
subl $18, 1, $1
subl $20, 1, $2
ble $16, $SubEnd # if n <= 0 goto $End
or $1, $2, $1
sra $16, 3, $21
and $16, 7, $22
bne $1, $Sub
ble $21, $MainRemain
.align 4
$MainLoop:
LD $f10, 0*SIZE($19)
LD $f11, 1*SIZE($19)
LD $f12, 2*SIZE($19)
LD $f13, 3*SIZE($19)
LD $f14, 4*SIZE($19)
LD $f15, 5*SIZE($19)
LD $f16, 6*SIZE($19)
LD $f17, 7*SIZE($19)
LD $f20, 0*SIZE($17)
LD $f21, 1*SIZE($17)
LD $f22, 2*SIZE($17)
LD $f23, 3*SIZE($17)
LD $f24, 4*SIZE($17)
LD $f25, 5*SIZE($17)
LD $f26, 6*SIZE($17)
LD $f27, 7*SIZE($17)
lds $f31, 32*SIZE($17)
unop
lds $f31, 32*SIZE($19)
subl $21, 1, $21
ST $f10, 0*SIZE($17)
ST $f11, 1*SIZE($17)
ST $f12, 2*SIZE($17)
ST $f13, 3*SIZE($17)
ST $f14, 4*SIZE($17)
ST $f15, 5*SIZE($17)
ST $f16, 6*SIZE($17)
ST $f17, 7*SIZE($17)
ST $f20, 0*SIZE($19)
ST $f21, 1*SIZE($19)
ST $f22, 2*SIZE($19)
ST $f23, 3*SIZE($19)
ST $f24, 4*SIZE($19)
ST $f25, 5*SIZE($19)
ST $f26, 6*SIZE($19)
ST $f27, 7*SIZE($19)
lda $17, 8*SIZE($17)
lda $19, 8*SIZE($19)
bgt $21, $MainLoop
.align 4
$MainRemain:
ble $22, $MainEnd
.align 4
$MainRemainLoop:
LD $f10, 0*SIZE($19)
LD $f20, 0*SIZE($17)
lda $17, 1*SIZE($17)
lda $19, 1*SIZE($19)
subl $22, 1, $22
ST $f10, -1*SIZE($17)
ST $f20, -1*SIZE($19)
bgt $22, $MainRemainLoop
.align 4
$MainEnd:
clr $0
ret
.align 4
$Sub:
mov $17, $23
mov $19, $24
ble $21, $SubRemain
.align 4
$SubLoop:
LD $f10, 0*SIZE($19)
SXADDQ $20, $19, $19
LD $f11, 0*SIZE($19)
SXADDQ $20, $19, $19
LD $f12, 0*SIZE($19)
SXADDQ $20, $19, $19
LD $f13, 0*SIZE($19)
SXADDQ $20, $19, $19
LD $f14, 0*SIZE($19)
SXADDQ $20, $19, $19
LD $f15, 0*SIZE($19)
SXADDQ $20, $19, $19
LD $f16, 0*SIZE($19)
SXADDQ $20, $19, $19
LD $f17, 0*SIZE($19)
SXADDQ $20, $19, $19
LD $f20, 0*SIZE($17)
SXADDQ $18, $17, $17
LD $f21, 0*SIZE($17)
SXADDQ $18, $17, $17
LD $f22, 0*SIZE($17)
SXADDQ $18, $17, $17
LD $f23, 0*SIZE($17)
SXADDQ $18, $17, $17
LD $f24, 0*SIZE($17)
SXADDQ $18, $17, $17
LD $f25, 0*SIZE($17)
SXADDQ $18, $17, $17
LD $f26, 0*SIZE($17)
SXADDQ $18, $17, $17
LD $f27, 0*SIZE($17)
SXADDQ $18, $17, $17
ST $f10, 0*SIZE($23)
SXADDQ $18, $23, $23
ST $f11, 0*SIZE($23)
SXADDQ $18, $23, $23
ST $f12, 0*SIZE($23)
SXADDQ $18, $23, $23
ST $f13, 0*SIZE($23)
SXADDQ $18, $23, $23
ST $f14, 0*SIZE($23)
SXADDQ $18, $23, $23
ST $f15, 0*SIZE($23)
SXADDQ $18, $23, $23
ST $f16, 0*SIZE($23)
SXADDQ $18, $23, $23
ST $f17, 0*SIZE($23)
SXADDQ $18, $23, $23
ST $f20, 0*SIZE($24)
SXADDQ $20, $24, $24
ST $f21, 0*SIZE($24)
SXADDQ $20, $24, $24
ST $f22, 0*SIZE($24)
SXADDQ $20, $24, $24
ST $f23, 0*SIZE($24)
SXADDQ $20, $24, $24
ST $f24, 0*SIZE($24)
SXADDQ $20, $24, $24
ST $f25, 0*SIZE($24)
SXADDQ $20, $24, $24
ST $f26, 0*SIZE($24)
SXADDQ $20, $24, $24
ST $f27, 0*SIZE($24)
SXADDQ $20, $24, $24
subl $21, 1, $21
bgt $21, $SubLoop
.align 4
$SubRemain:
ble $22, $SubEnd
.align 4
$SubRemainLoop:
LD $f10, 0*SIZE($19)
LD $f20, 0*SIZE($17)
subl $22, 1, $22
ST $f10, 0*SIZE($17)
ST $f20, 0*SIZE($19)
SXADDQ $18, $17, $17
SXADDQ $20, $19, $19
bgt $22, $SubRemainLoop
.align 4
$SubEnd:
clr $0
ret
EPILOGUE

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

301
kernel/alpha/zamax.S Normal file
View File

@@ -0,0 +1,301 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#ifndef USE_MIN
#define CMPLT(a, b) cmptlt a, b
#else
#define CMPLT(a, b) cmptlt b, a
#endif
#define STACKSIZE 8 * 8
PROLOGUE
PROFCODE
.frame $sp, STACKSIZE, $26, 0
lda $sp, -STACKSIZE($sp)
stt $f2, 0($sp)
fclr $f16
cmplt $31, N, $2
stt $f3, 8($sp)
fclr $f17
cmplt $31, INCX, $3
unop
stt $f4, 16($sp)
fclr $f18
SXADDQ INCX, $31, INCX
unop
stt $f5, 24($sp)
fclr $f19
and $2, $3, $0
unop
stt $f6, 32($sp)
unop
stt $f7, 40($sp)
stt $f8, 48($sp)
stt $f9, 56($sp)
fclr $f0
beq $0, $End # if (n <= 0) or (incx <= 0) return
.align 4
LD $f20, 0 * SIZE(X)
LD $f21, 1 * SIZE(X)
sra N, 2, $1
addq INCX, INCX, INCX
fabs $f20, $f20
fabs $f21, $f21
addt $f20, $f21, $f0
ble $1, $L15
.align 4
lda $1, -1($1)
unop
addq X, INCX, X
unop
LD $f22, 0 * SIZE(X)
fmov $f0, $f1
LD $f23, 1 * SIZE(X)
addq X, INCX, X
LD $f24, 0 * SIZE(X)
fmov $f0, $f2
LD $f25, 1 * SIZE(X)
addq X, INCX, X
LD $f26, 0 * SIZE(X)
fmov $f0, $f3
LD $f27, 1 * SIZE(X)
addq X, INCX, X
fabs $f20, $f8
fabs $f21, $f9
fabs $f22, $f10
fabs $f23, $f11
fabs $f24, $f12
fabs $f25, $f13
fabs $f26, $f14
fabs $f27, $f15
ble $1, $L14
.align 4
LD $f20, 0 * SIZE(X)
LD $f21, 1 * SIZE(X)
lda $1, -1($1)
addq X, INCX, X
LD $f22, 0 * SIZE(X)
LD $f23, 1 * SIZE(X)
unop
addq X, INCX, X
LD $f24, 0 * SIZE(X)
LD $f25, 1 * SIZE(X)
unop
addq X, INCX, X
LD $f26, 0 * SIZE(X)
LD $f27, 1 * SIZE(X)
addq X, INCX, X
ble $1, $L13
.align 4
$L12:
addt $f8, $f9, $f16
unop
fabs $f20, $f8
ldl $31, 64 * SIZE(X)
addt $f10, $f11, $f17
unop
fabs $f21, $f9
LD $f20, 0 * SIZE(X)
addt $f12, $f13, $f18
LD $f21, 1 * SIZE(X)
fabs $f22, $f10
addq X, INCX, X
addt $f14, $f15, $f19
LD $f22, 0 * SIZE(X)
fabs $f23, $f11
unop
CMPLT($f0, $f16), $f4
LD $f23, 1 * SIZE(X)
fabs $f24, $f12
addq X, INCX, X
CMPLT($f1, $f17), $f5
LD $f24, 0 * SIZE(X)
fabs $f25, $f13
unop
CMPLT($f2, $f18), $f6
LD $f25, 1 * SIZE(X)
fabs $f26, $f14
addq X, INCX, X
CMPLT($f3, $f19), $f7
LD $f26, 0 * SIZE(X)
fabs $f27, $f15
unop
fcmovne $f4, $f16, $f0
LD $f27, 1 * SIZE(X)
addq X, INCX, X
lda $1, -1($1) # i --
fcmovne $f5, $f17, $f1
fcmovne $f6, $f18, $f2
fcmovne $f7, $f19, $f3
bgt $1,$L12
.align 4
$L13:
addt $f8, $f9, $f16
fabs $f20, $f8
addt $f10, $f11, $f17
fabs $f21, $f9
addt $f12, $f13, $f18
fabs $f22, $f10
addt $f14, $f15, $f19
fabs $f23, $f11
CMPLT($f0, $f16), $f4
fabs $f24, $f12
CMPLT($f1, $f17), $f5
fabs $f25, $f13
CMPLT($f2, $f18), $f6
fabs $f26, $f14
CMPLT($f3, $f19), $f7
fabs $f27, $f15
fcmovne $f4, $f16, $f0
fcmovne $f5, $f17, $f1
fcmovne $f6, $f18, $f2
fcmovne $f7, $f19, $f3
.align 4
$L14:
addt $f8, $f9, $f16
addt $f10, $f11, $f17
addt $f12, $f13, $f18
addt $f14, $f15, $f19
CMPLT($f0, $f16), $f4
CMPLT($f1, $f17), $f5
CMPLT($f2, $f18), $f6
CMPLT($f3, $f19), $f7
fcmovne $f4, $f16, $f0
fcmovne $f5, $f17, $f1
fcmovne $f6, $f18, $f2
fcmovne $f7, $f19, $f3
CMPLT($f0, $f1), $f16
CMPLT($f2, $f3), $f17
fcmovne $f16, $f1, $f0
fcmovne $f17, $f3, $f2
CMPLT($f0, $f2), $f16
fcmovne $f16, $f2, $f0
.align 4
$L15:
and N, 3, $1
unop
unop
ble $1, $End
.align 4
$L16:
LD $f20, 0 * SIZE(X)
LD $f21, 1 * SIZE(X)
unop
addq X, INCX, X
fabs $f20, $f29
fabs $f21, $f30
addt $f29, $f30, $f29
CMPLT($f0, $f29), $f16
fcmovne $f16, $f29, $f0
lda $1, -1($1) # i --
bgt $1, $L16
.align 4
$End:
ldt $f2, 0($sp)
ldt $f3, 8($sp)
ldt $f4, 16($sp)
ldt $f5, 24($sp)
ldt $f6, 32($sp)
ldt $f7, 40($sp)
ldt $f8, 48($sp)
ldt $f9, 56($sp)
lda $sp, STACKSIZE($sp)
ret
EPILOGUE

208
kernel/alpha/zasum.S Normal file
View File

@@ -0,0 +1,208 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCHSIZE 88
#define N $16
#define X $17
#define INCX $18
#define I $19
#define s0 $f0
#define s1 $f1
#define s2 $f10
#define s3 $f11
#define a0 $f12
#define a1 $f13
#define a2 $f14
#define a3 $f15
#define a4 $f16
#define a5 $f17
#define a6 $f18
#define a7 $f19
#define t0 $f20
#define t1 $f21
#define t2 $f22
#define t3 $f23
PROLOGUE
PROFCODE
fclr s0
unop
fclr t0
addq INCX, INCX, INCX
fclr s1
unop
fclr t1
ble N, $L999
fclr s2
sra N, 2, I
fclr s3
ble I, $L15
LD a0, 0 * SIZE(X)
fclr t2
LD a1, 1 * SIZE(X)
SXADDQ INCX, X, X
LD a2, 0 * SIZE(X)
fclr t3
LD a3, 1 * SIZE(X)
SXADDQ INCX, X, X
LD a4, 0 * SIZE(X)
LD a5, 1 * SIZE(X)
SXADDQ INCX, X, X
lda I, -1(I)
ble I, $L13
.align 4
$L12:
ADD s0, t0, s0
ldl $31, PREFETCHSIZE * SIZE(X)
fabs a0, t0
lda I, -1(I)
ADD s1, t1, s1
LD a6, 0 * SIZE(X)
fabs a1, t1
unop
ADD s2, t2, s2
LD a7, 1 * SIZE(X)
fabs a2, t2
SXADDQ INCX, X, X
ADD s3, t3, s3
LD a0, 0 * SIZE(X)
fabs a3, t3
unop
ADD s0, t0, s0
LD a1, 1 * SIZE(X)
fabs a4, t0
SXADDQ INCX, X, X
ADD s1, t1, s1
LD a2, 0 * SIZE(X)
fabs a5, t1
unop
ADD s2, t2, s2
LD a3, 1 * SIZE(X)
fabs a6, t2
SXADDQ INCX, X, X
ADD s3, t3, s3
LD a4, 0 * SIZE(X)
fabs a7, t3
unop
LD a5, 1 * SIZE(X)
unop
SXADDQ INCX, X, X
bne I, $L12
.align 4
$L13:
ADD s0, t0, s0
LD a6, 0 * SIZE(X)
fabs a0, t0
ADD s1, t1, s1
LD a7, 1 * SIZE(X)
fabs a1, t1
SXADDQ INCX, X, X
ADD s2, t2, s2
fabs a2, t2
ADD s3, t3, s3
fabs a3, t3
ADD s0, t0, s0
fabs a4, t0
ADD s1, t1, s1
fabs a5, t1
ADD s2, t2, s2
fabs a6, t2
ADD s3, t3, s3
fabs a7, t3
ADD s2, t2, s2
ADD s3, t3, s3
.align 4
$L15:
ADD s0, s2, s0
and N, 3, I
ADD s1, s3, s1
ble I, $L999
.align 4
$L17:
ADD s0, t0, s0
LD a0, 0 * SIZE(X)
fabs a0, t0
lda I, -1(I)
ADD s1, t1, s1
LD a1, 1 * SIZE(X)
fabs a1, t1
SXADDQ INCX, X, X
bne I, $L17
.align 4
$L999:
ADD s0, t0, s0
ADD s1, t1, s1
ADD s0, s1, s0
ret
EPILOGUE

611
kernel/alpha/zaxpy.S Normal file
View File

@@ -0,0 +1,611 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCHSIZE 40
#ifndef CONJ
#define ADD1 SUB
#define ADD2 ADD
#else
#define ADD1 ADD
#define ADD2 SUB
#endif
PROLOGUE
PROFCODE
.frame $sp, 16, $26, 0
ldl $19, 0($sp)
fmov $f19, $f29
ldq $20, 8($sp)
fmov $f20, $f30
mov $21, $18
ldl $21, 16($sp)
lda $sp, -64($sp)
nop
stt $f2, 0($sp)
cmpeq $19, 1, $1
stt $f3, 8($sp)
cmpeq $21, 1, $2
stt $f4, 16($sp)
and $16, 3, $5
stt $f5, 24($sp)
stt $f6, 32($sp)
stt $f7, 40($sp)
stt $f8, 48($sp)
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
and $1, $2, $1
ble $16, $End
sra $16, 2, $4
beq $1, $Sub
ble $4, $Remain
subq $4, 1, $4
LD $f0, 0*SIZE($18)
LD $f1, 1*SIZE($18)
LD $f2, 2*SIZE($18)
LD $f3, 3*SIZE($18)
LD $f4, 4*SIZE($18)
LD $f5, 5*SIZE($18)
LD $f6, 6*SIZE($18)
LD $f7, 7*SIZE($18)
LD $f8, 0*SIZE($20)
LD $f28, 1*SIZE($20)
LD $f10, 2*SIZE($20)
LD $f11, 3*SIZE($20)
LD $f12, 4*SIZE($20)
LD $f13, 5*SIZE($20)
LD $f14, 6*SIZE($20)
LD $f15, 7*SIZE($20)
addq $18, 8*SIZE, $18
ble $4, $MainLoopEnd
.align 4
$MainLoop:
ldt $f31, PREFETCHSIZE * SIZE($20)
ldl $31, PREFETCHSIZE * SIZE($18)
MUL $f29, $f0, $f20
LD $f31, 9*SIZE($18)
MUL $f30, $f1, $f21
unop
MUL $f30, $f0, $f22
LD $f0, 0*SIZE($18)
MUL $f29, $f1, $f23
LD $f1, 1*SIZE($18)
MUL $f29, $f2, $f24
unop
MUL $f30, $f3, $f25
nop
MUL $f30, $f2, $f26
LD $f2, 2*SIZE($18)
MUL $f29, $f3, $f27
LD $f3, 3*SIZE($18)
ADD1 $f20, $f21, $f16
MUL $f29, $f4, $f20
ADD2 $f22, $f23, $f17
MUL $f30, $f5, $f21
ADD1 $f24, $f25, $f18
unop
MUL $f30, $f4, $f22
LD $f4, 4*SIZE($18)
ADD2 $f26, $f27, $f19
addq $20, 8*SIZE, $20
MUL $f29, $f5, $f23
LD $f5, 5*SIZE($18)
ADD $f16, $f8, $f16
LD $f8, 0*SIZE($20)
MUL $f29, $f6, $f24
unop
ADD $f17, $f28, $f17
LD $f28, 1*SIZE($20)
MUL $f30, $f7, $f25
unop
ADD $f18, $f10, $f18
LD $f10, 2*SIZE($20)
MUL $f30, $f6, $f26
LD $f6, 6*SIZE($18)
ADD $f19, $f11, $f19
LD $f11, 3*SIZE($20)
MUL $f29, $f7, $f27
LD $f7, 7*SIZE($18)
ST $f16,-8*SIZE($20)
ADD1 $f20, $f21, $f16
ST $f17,-7*SIZE($20)
ADD2 $f22, $f23, $f17
ST $f18,-6*SIZE($20)
ADD1 $f24, $f25, $f18
ST $f19,-5*SIZE($20)
ADD2 $f26, $f27, $f19
ADD $f16, $f12, $f16
LD $f12, 4*SIZE($20)
ADD $f17, $f13, $f17
LD $f13, 5*SIZE($20)
ADD $f18, $f14, $f18
LD $f14, 6*SIZE($20)
ADD $f19, $f15, $f19
LD $f15, 7*SIZE($20)
ST $f16,-4*SIZE($20)
addq $18, 8*SIZE, $18
ST $f17,-3*SIZE($20)
subq $4, 1, $4
ST $f18,-2*SIZE($20)
nop
ST $f19,-1*SIZE($20)
bgt $4, $MainLoop
.align 4
$MainLoopEnd:
MUL $f29, $f0, $f20
MUL $f30, $f1, $f21
MUL $f30, $f0, $f22
MUL $f29, $f1, $f23
MUL $f29, $f2, $f24
MUL $f30, $f3, $f25
MUL $f30, $f2, $f26
MUL $f29, $f3, $f27
ADD1 $f20, $f21, $f16
MUL $f29, $f4, $f20
ADD2 $f22, $f23, $f17
MUL $f30, $f5, $f21
ADD1 $f24, $f25, $f18
MUL $f30, $f4, $f22
ADD2 $f26, $f27, $f19
MUL $f29, $f5, $f23
ADD $f16, $f8, $f16
MUL $f29, $f6, $f24
ADD $f17, $f28, $f17
MUL $f30, $f7, $f25
ADD $f18, $f10, $f18
MUL $f30, $f6, $f26
ADD $f19, $f11, $f19
MUL $f29, $f7, $f27
ST $f16, 0*SIZE($20)
ADD1 $f20, $f21, $f16
ST $f17, 1*SIZE($20)
ADD2 $f22, $f23, $f17
ST $f18, 2*SIZE($20)
ADD1 $f24, $f25, $f18
ST $f19, 3*SIZE($20)
ADD2 $f26, $f27, $f19
ADD $f16, $f12, $f16
ADD $f17, $f13, $f17
ADD $f18, $f14, $f18
ADD $f19, $f15, $f19
ST $f16, 4*SIZE($20)
ST $f17, 5*SIZE($20)
ST $f18, 6*SIZE($20)
ST $f19, 7*SIZE($20)
unop
addq $20, 8*SIZE, $20
unop
ble $5, $End
.align 4
$Remain:
subq $5, 1, $6
ble $5, $End
LD $f0, 0*SIZE($18)
LD $f1, 1*SIZE($18)
LD $f8, 0*SIZE($20)
LD $f28, 1*SIZE($20)
addq $18, 2*SIZE, $18
ble $6, $RemainLoopEnd
.align 4
$RemainLoop:
MUL $f29, $f0, $f20
subq $6, 1, $6
MUL $f30, $f1, $f21
addq $20, 2*SIZE, $20
MUL $f30, $f0, $f22
LD $f0, 0*SIZE($18)
MUL $f29, $f1, $f23
LD $f1, 1*SIZE($18)
ADD1 $f20, $f21, $f16
ADD2 $f22, $f23, $f17
ADD $f16, $f8, $f16
LD $f8, 0*SIZE($20)
ADD $f17, $f28, $f17
LD $f28, 1*SIZE($20)
ST $f16,-2*SIZE($20)
addq $18, 2*SIZE, $18
ST $f17,-1*SIZE($20)
bgt $6, $RemainLoop
.align 4
$RemainLoopEnd:
MUL $f29, $f0, $f20
MUL $f30, $f1, $f21
MUL $f30, $f0, $f22
MUL $f29, $f1, $f23
ADD1 $f20, $f21, $f16
ADD2 $f22, $f23, $f17
ADD $f16, $f8, $f16
ADD $f17, $f28, $f17
ST $f16, 0*SIZE($20)
nop
ST $f17, 1*SIZE($20)
nop
.align 4
$End:
ldt $f2, 0($sp)
ldt $f3, 8($sp)
ldt $f4, 16($sp)
ldt $f5, 24($sp)
ldt $f6, 32($sp)
ldt $f7, 40($sp)
ldt $f8, 48($sp)
lda $sp, 64($sp)
ret
.align 4
$Sub:
SXSUBL $16, SIZE, $22
addq $22, $22, $22 # Complex
.align 4
addq $19, $19, $19 # Complex
addq $21, $21, $21 # Complex
ble $4, $SubRemain
LD $f0, 0*SIZE($18)
LD $f1, 1*SIZE($18)
SXADDQ $19, $18, $18
LD $f2, 0*SIZE($18)
LD $f3, 1*SIZE($18)
SXADDQ $19, $18, $18
LD $f4, 0*SIZE($18)
LD $f5, 1*SIZE($18)
SXADDQ $19, $18, $18
LD $f6, 0*SIZE($18)
LD $f7, 1*SIZE($18)
SXADDQ $19, $18, $18
LD $f8, 0*SIZE($20)
LD $f28, 1*SIZE($20)
SXADDQ $21, $20, $24
LD $f10, 0*SIZE($24)
LD $f11, 1*SIZE($24)
SXADDQ $21, $24, $24
LD $f12, 0*SIZE($24)
LD $f13, 1*SIZE($24)
SXADDQ $21, $24, $24
LD $f14, 0*SIZE($24)
LD $f15, 1*SIZE($24)
SXADDQ $21, $24, $24
subq $4, 1, $4
ble $4, $SubMainLoopEnd
.align 4
$SubMainLoop:
MUL $f29, $f0, $f20
unop
MUL $f30, $f1, $f21
unop
MUL $f30, $f0, $f22
LD $f0, 0*SIZE($18)
MUL $f29, $f1, $f23
LD $f1, 1*SIZE($18)
MUL $f29, $f2, $f24
SXADDQ $19, $18, $18
MUL $f30, $f3, $f25
unop
MUL $f30, $f2, $f26
LD $f2, 0*SIZE($18)
MUL $f29, $f3, $f27
LD $f3, 1*SIZE($18)
ADD1 $f20, $f21, $f16
SXADDQ $19, $18, $18
MUL $f29, $f4, $f20
unop
ADD2 $f22, $f23, $f17
unop
MUL $f30, $f5, $f21
unop
ADD1 $f24, $f25, $f18
unop
MUL $f30, $f4, $f22
LD $f4, 0*SIZE($18)
ADD2 $f26, $f27, $f19
unop
MUL $f29, $f5, $f23
LD $f5, 1*SIZE($18)
ADD $f16, $f8, $f16
LD $f8, 0*SIZE($24)
MUL $f29, $f6, $f24
SXADDQ $19, $18, $18
ADD $f17, $f28, $f17
LD $f28, 1*SIZE($24)
MUL $f30, $f7, $f25
SXADDQ $21, $24, $24
ADD $f18, $f10, $f18
LD $f10, 0*SIZE($24)
MUL $f30, $f6, $f26
LD $f6, 0*SIZE($18)
ADD $f19, $f11, $f19
LD $f11, 1*SIZE($24)
MUL $f29, $f7, $f27
LD $f7, 1*SIZE($18)
ST $f16, 0*SIZE($20)
SXADDQ $19, $18, $18
ADD1 $f20, $f21, $f16
unop
ST $f17, 1*SIZE($20)
SXADDQ $21, $20, $20
ADD2 $f22, $f23, $f17
unop
ST $f18, 0*SIZE($20)
SXADDQ $21, $24, $24
ADD1 $f24, $f25, $f18
unop
ST $f19, 1*SIZE($20)
unop
ADD2 $f26, $f27, $f19
SXADDQ $21, $20, $20
ADD $f16, $f12, $f16
unop
LD $f12, 0*SIZE($24)
unop
ADD $f17, $f13, $f17
unop
LD $f13, 1*SIZE($24)
SXADDQ $21, $24, $24
ADD $f18, $f14, $f18
subq $4, 1, $4
LD $f14, 0*SIZE($24)
unop
ADD $f19, $f15, $f19
unop
LD $f15, 1*SIZE($24)
SXADDQ $21, $24, $24
ST $f16, 0*SIZE($20)
ST $f17, 1*SIZE($20)
SXADDQ $21, $20, $20
unop
ST $f18, 0*SIZE($20)
ST $f19, 1*SIZE($20)
SXADDQ $21, $20, $20
bgt $4, $SubMainLoop
.align 4
$SubMainLoopEnd:
MUL $f29, $f0, $f20
MUL $f30, $f1, $f21
MUL $f30, $f0, $f22
MUL $f29, $f1, $f23
MUL $f29, $f2, $f24
MUL $f30, $f3, $f25
MUL $f30, $f2, $f26
MUL $f29, $f3, $f27
ADD1 $f20, $f21, $f16
MUL $f29, $f4, $f20
ADD2 $f22, $f23, $f17
MUL $f30, $f5, $f21
ADD1 $f24, $f25, $f18
MUL $f30, $f4, $f22
ADD2 $f26, $f27, $f19
MUL $f29, $f5, $f23
ADD $f16, $f8, $f16
MUL $f29, $f6, $f24
ADD $f17, $f28, $f17
MUL $f30, $f7, $f25
ADD $f18, $f10, $f18
MUL $f30, $f6, $f26
ADD $f19, $f11, $f19
MUL $f29, $f7, $f27
ST $f16, 0*SIZE($20)
ADD1 $f20, $f21, $f16
ST $f17, 1*SIZE($20)
ADD2 $f22, $f23, $f17
SXADDQ $21, $20, $20
nop
ST $f18, 0*SIZE($20)
ADD1 $f24, $f25, $f18
ST $f19, 1*SIZE($20)
ADD2 $f26, $f27, $f19
SXADDQ $21, $20, $20
ADD $f16, $f12, $f16
ADD $f17, $f13, $f17
ADD $f18, $f14, $f18
ADD $f19, $f15, $f19
ST $f16, 0*SIZE($20)
ST $f17, 1*SIZE($20)
SXADDQ $21, $20, $20
ST $f18, 0*SIZE($20)
ST $f19, 1*SIZE($20)
SXADDQ $21, $20, $20
ble $5, $SubEnd
.align 4
$SubRemain:
subq $5, 1, $6
ble $5, $SubEnd
LD $f0, 0*SIZE($18)
LD $f1, 1*SIZE($18)
LD $f8, 0*SIZE($20)
LD $f28, 1*SIZE($20)
SXADDQ $19, $18, $18
SXADDQ $21, $20, $24
ble $6, $SubRemainLoopEnd
.align 4
$SubRemainLoop:
MUL $f29, $f0, $f20
MUL $f30, $f1, $f21
MUL $f30, $f0, $f22
LD $f0, 0*SIZE($18)
MUL $f29, $f1, $f23
LD $f1, 1*SIZE($18)
ADD1 $f20, $f21, $f16
SXADDQ $19, $18, $18
ADD2 $f22, $f23, $f17
nop
ADD $f16, $f8, $f16
LD $f8, 0*SIZE($24)
ADD $f17, $f28, $f17
LD $f28, 1*SIZE($24)
SXADDQ $21, $24, $24
subq $6, 1, $6
ST $f16, 0*SIZE($20)
ST $f17, 1*SIZE($20)
SXADDQ $21, $20, $20
bgt $6, $SubRemainLoop
.align 4
$SubRemainLoopEnd:
MUL $f29, $f0, $f20
MUL $f30, $f1, $f21
MUL $f30, $f0, $f22
MUL $f29, $f1, $f23
ADD1 $f20, $f21, $f16
ADD2 $f22, $f23, $f17
ADD $f16, $f8, $f16
ADD $f17, $f28, $f17
ST $f16, 0*SIZE($20)
nop
ST $f17, 1*SIZE($20)
nop
.align 4
$SubEnd:
ldt $f2, 0($sp)
ldt $f3, 8($sp)
ldt $f4, 16($sp)
ldt $f5, 24($sp)
ldt $f6, 32($sp)
ldt $f7, 40($sp)
ldt $f8, 48($sp)
lda $sp, 64($sp)
ret
EPILOGUE

500
kernel/alpha/zdot.S Normal file
View File

@@ -0,0 +1,500 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCHSIZE 88
#define N $16
#define X $17
#define INCX $18
#define Y $19
#define INCY $20
#define XX $21
#define YY $23
#define I $5
#define s0 $f0
#define s1 $f1
#define s2 $f2
#define s3 $f30
#define a0 $f10
#define a1 $f11
#define a2 $f12
#define a3 $f13
#define a4 $f14
#define a5 $f15
#define a6 $f16
#define a7 $f17
#define b0 $f18
#define b1 $f19
#define b2 $f20
#define b3 $f21
#define b4 $f22
#define b5 $f23
#define b6 $f24
#define b7 $f25
#define t0 $f26
#define t1 $f27
#define t2 $f28
#define t3 $f29
PROLOGUE
PROFCODE
.frame $sp, 16, $26, 0
lda $sp, -16($sp)
fclr s0
stt $f2, 0($sp)
fclr s1
fclr s2
addq INCX, INCX, INCX
fclr s3
ble N, $L999
addq INCY, INCY, INCY
fclr t0
fclr t1
fclr t2
fclr t3
srl N, 3, I
ble I, $L25
LD a0, 0 * SIZE(X)
LD a1, 1 * SIZE(X)
LD b0, 0 * SIZE(Y)
LD b1, 1 * SIZE(Y)
SXADDQ INCX, X, X
SXADDQ INCY, Y, Y
LD a2, 0 * SIZE(X)
LD a3, 1 * SIZE(X)
LD b2, 0 * SIZE(Y)
LD b3, 1 * SIZE(Y)
SXADDQ INCX, X, X
SXADDQ INCY, Y, Y
LD a4, 0 * SIZE(X)
LD a5, 1 * SIZE(X)
LD b4, 0 * SIZE(Y)
LD b5, 1 * SIZE(Y)
SXADDQ INCX, X, X
SXADDQ INCY, Y, Y
LD a6, 0 * SIZE(X)
LD b6, 0 * SIZE(Y)
subq I, 1, I
ble I, $L23
.align 4
$L22:
ADD s0, t0, s0
LD a7, 1 * SIZE(X)
MUL a0, b0, t0
LD b7, 1 * SIZE(Y)
ADD s1, t1, s1
ldl $31, PREFETCHSIZE * SIZE(X)
MUL a0, b1, t1
SXADDQ INCX, X, X
ADD s2, t2, s2
ldl $31, PREFETCHSIZE * SIZE(Y)
MUL a1, b0, t2
SXADDQ INCY, Y, Y
ADD s3, t3, s3
LD a0, 0 * SIZE(X)
MUL a1, b1, t3
LD a1, 1 * SIZE(X)
ADD s0, t0, s0
LD b0, 0 * SIZE(Y)
MUL a2, b2, t0
LD b1, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a2, b3, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a3, b2, t2
unop
ADD s3, t3, s3
LD a2, 0 * SIZE(X)
MUL a3, b3, t3
LD a3, 1 * SIZE(X)
ADD s0, t0, s0
LD b2, 0 * SIZE(Y)
MUL a4, b4, t0
LD b3, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a4, b5, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a5, b4, t2
unop
ADD s3, t3, s3
LD a4, 0 * SIZE(X)
MUL a5, b5, t3
LD a5, 1 * SIZE(X)
ADD s0, t0, s0
LD b4, 0 * SIZE(Y)
MUL a6, b6, t0
LD b5, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a6, b7, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a7, b6, t2
unop
ADD s3, t3, s3
LD a6, 0 * SIZE(X)
MUL a7, b7, t3
LD a7, 1 * SIZE(X)
ADD s0, t0, s0
LD b6, 0 * SIZE(Y)
MUL a0, b0, t0
LD b7, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a0, b1, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a1, b0, t2
unop
ADD s3, t3, s3
LD a0, 0 * SIZE(X)
MUL a1, b1, t3
LD a1, 1 * SIZE(X)
ADD s0, t0, s0
LD b0, 0 * SIZE(Y)
MUL a2, b2, t0
LD b1, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a2, b3, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a3, b2, t2
unop
ADD s3, t3, s3
LD a2, 0 * SIZE(X)
MUL a3, b3, t3
LD a3, 1 * SIZE(X)
ADD s0, t0, s0
LD b2, 0 * SIZE(Y)
MUL a4, b4, t0
LD b3, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a4, b5, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a5, b4, t2
subq I, 1, I
ADD s3, t3, s3
LD a4, 0 * SIZE(X)
MUL a5, b5, t3
LD a5, 1 * SIZE(X)
ADD s0, t0, s0
LD b4, 0 * SIZE(Y)
MUL a6, b6, t0
LD b5, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a6, b7, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
LD a6, 0 * SIZE(X)
MUL a7, b6, t2
unop
ADD s3, t3, s3
LD b6, 0 * SIZE(Y)
MUL a7, b7, t3
bgt I, $L22
.align 4
$L23:
ADD s0, t0, s0
LD a7, 1 * SIZE(X)
MUL a0, b0, t0
LD b7, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a0, b1, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a1, b0, t2
unop
ADD s3, t3, s3
LD a0, 0 * SIZE(X)
MUL a1, b1, t3
LD a1, 1 * SIZE(X)
ADD s0, t0, s0
LD b0, 0 * SIZE(Y)
MUL a2, b2, t0
LD b1, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a2, b3, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a3, b2, t2
unop
ADD s3, t3, s3
LD a2, 0 * SIZE(X)
MUL a3, b3, t3
LD a3, 1 * SIZE(X)
ADD s0, t0, s0
LD b2, 0 * SIZE(Y)
MUL a4, b4, t0
LD b3, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a4, b5, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a5, b4, t2
unop
ADD s3, t3, s3
LD a4, 0 * SIZE(X)
MUL a5, b5, t3
LD a5, 1 * SIZE(X)
ADD s0, t0, s0
LD b4, 0 * SIZE(Y)
MUL a6, b6, t0
LD b5, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a6, b7, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
unop
MUL a7, b6, t2
unop
ADD s3, t3, s3
LD a6, 0 * SIZE(X)
MUL a7, b7, t3
LD a7, 1 * SIZE(X)
ADD s0, t0, s0
LD b6, 0 * SIZE(Y)
MUL a0, b0, t0
LD b7, 1 * SIZE(Y)
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a0, b1, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
MUL a1, b0, t2
ADD s3, t3, s3
MUL a1, b1, t3
ADD s0, t0, s0
MUL a2, b2, t0
ADD s1, t1, s1
MUL a2, b3, t1
ADD s2, t2, s2
MUL a3, b2, t2
ADD s3, t3, s3
MUL a3, b3, t3
ADD s0, t0, s0
MUL a4, b4, t0
ADD s1, t1, s1
MUL a4, b5, t1
ADD s2, t2, s2
MUL a5, b4, t2
ADD s3, t3, s3
MUL a5, b5, t3
ADD s0, t0, s0
MUL a6, b6, t0
ADD s1, t1, s1
MUL a6, b7, t1
ADD s2, t2, s2
MUL a7, b6, t2
ADD s3, t3, s3
MUL a7, b7, t3
.align 4
$L25:
and N, 7, I
unop
unop
ble I, $L998
LD a0, 0 * SIZE(X)
LD a1, 1 * SIZE(X)
LD b0, 0 * SIZE(Y)
LD b1, 1 * SIZE(Y)
SXADDQ INCX, X, X
subq I, 1, I
SXADDQ INCY, Y, Y
ble I, $L28
.align 4
$L26:
ADD s0, t0, s0
mov X, XX
MUL a0, b0, t0
mov Y, YY
ADD s1, t1, s1
SXADDQ INCX, X, X
MUL a0, b1, t1
SXADDQ INCY, Y, Y
ADD s2, t2, s2
LD a0, 0 * SIZE(XX)
MUL a1, b0, t2
LD b0, 0 * SIZE(YY)
ADD s3, t3, s3
subq I, 1, I
MUL a1, b1, t3
LD a1, 1 * SIZE(XX)
LD b1, 1 * SIZE(YY)
bgt I, $L26
.align 4
$L28:
ADD s0, t0, s0
MUL a0, b0, t0
ADD s1, t1, s1
MUL a0, b1, t1
ADD s2, t2, s2
MUL a1, b0, t2
ADD s3, t3, s3
MUL a1, b1, t3
.align 4
$L998:
ADD s0, t0, s0
ADD s1, t1, s1
ADD s2, t2, s2
ADD s3, t3, s3
#ifndef CONJ
SUB s0, s3, s0
ADD s1, s2, s1
#else
ADD s0, s3, s0
SUB s1, s2, s1
#endif
.align 4
$L999:
ldt $f2, 0($sp)
lda $sp, 16($sp)
ret
EPILOGUE

192
kernel/alpha/zgemm_beta.S Normal file
View File

@@ -0,0 +1,192 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
.set noat
.set noreorder
.text
.align 5
.globl CNAME
.ent CNAME
CNAME:
.frame $sp, 0, $26, 0
#ifdef PROFILE
ldgp $gp, 0($27)
lda $28, _mcount
jsr $28, ($28), _mcount
.prologue 1
#else
.prologue 0
#endif
ldq $18, 24($sp)
ble $16, $End
ldl $19, 32($sp)
ble $17, $End
addq $19, $19, $19
fbne $f19,$Main
fbne $f20,$Main
.align 4
$L13:
mov $18, $1
lda $17, -1($17)
SXADDQ $19, $18, $18
mov $16, $2
.align 4
$L12:
ST $f31, 0*SIZE($1)
ST $f31, 1*SIZE($1)
lda $2, -1($2)
lda $1, 2*SIZE($1)
bgt $2, $L12
bgt $17,$L13
clr $0
ret
.align 4
/* Main Routine */
$Main:
sra $16, 1, $2 # $2 = (m >> 1)
mov $18, $1 # c_offset = c
lda $17, -1($17) # n --
SXADDQ $19, $18, $18 # c += ldc
beq $2, $L18
LD $f14, 0*SIZE($1)
LD $f15, 1*SIZE($1)
LD $f24, 2*SIZE($1)
LD $f25, 3*SIZE($1)
lda $2, -1($2) # $2 --
ble $2, $L19
.align 4
$L23:
MUL $f19, $f14, $f10
lds $f31, 9*SIZE($1)
MUL $f20, $f15, $f11
lda $2, -1($2)
MUL $f19, $f15, $f12
LD $f15, 5*SIZE($1)
MUL $f20, $f14, $f13
LD $f14, 4*SIZE($1)
MUL $f19, $f24, $f16
unop
MUL $f20, $f25, $f17
unop
MUL $f19, $f25, $f18
LD $f25, 7*SIZE($1)
SUB $f10, $f11, $f22
unop
MUL $f20, $f24, $f21
LD $f24, 6*SIZE($1)
ADD $f12, $f13, $f23
lda $1, 4*SIZE($1)
SUB $f16, $f17, $f26
ADD $f18, $f21, $f27
ST $f22,-4*SIZE($1)
ST $f23,-3*SIZE($1)
ST $f26,-2*SIZE($1)
ST $f27,-1*SIZE($1)
unop
bgt $2,$L23
.align 4
$L19:
MUL $f19, $f14, $f10
MUL $f20, $f15, $f11
MUL $f19, $f15, $f12
MUL $f20, $f14, $f13
MUL $f19, $f24, $f16
MUL $f20, $f25, $f17
MUL $f19, $f25, $f18
MUL $f20, $f24, $f21
SUB $f10, $f11, $f22
ADD $f12, $f13, $f23
SUB $f16, $f17, $f26
ADD $f18, $f21, $f27
lda $1, 4*SIZE($1)
ST $f22, -4*SIZE($1)
ST $f23, -3*SIZE($1)
ST $f26, -2*SIZE($1)
ST $f27, -1*SIZE($1)
blbs $16, $L18
bgt $17, $Main
clr $0
ret
.align 4
$L18:
LD $f14, 0*SIZE($1)
LD $f15, 1*SIZE($1)
MUL $f19, $f15, $f13
MUL $f20, $f14, $f10
MUL $f19, $f14, $f12
MUL $f20, $f15, $f11
ADD $f13, $f10, $f26
SUB $f12, $f11, $f27
ST $f26, 1*SIZE($1)
ST $f27, 0*SIZE($1)
lda $1, 2*SIZE($1)
bgt $17, $Main
.align 4
$End:
clr $0
ret
.ident VERSION
.end CNAME

File diff suppressed because it is too large Load Diff

1027
kernel/alpha/zgemv_n.S Normal file

File diff suppressed because it is too large Load Diff

922
kernel/alpha/zgemv_t.S Normal file
View File

@@ -0,0 +1,922 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define STACKSIZE 64
#define PREFETCHSIZE 32
#define M $16
#define N $17
#define A $21
#define LDA $18
#define X $19
#define INCX $20
#define Y $22
#define INCY $23
#define BUFFER $24
#define I $25
#define J $27
#define X1 $3
#define Y1 $4
#define A1 $5
#define A2 $6
#define alpha_r $f19
#define alpha_i $f20
#define s0 $f0
#define s1 $f1
#define s2 $f10
#define s3 $f11
#define t0 $f12
#define t1 $f13
#define t2 $f14
#define t3 $f15
#define x0 $f16
#define x1 $f17
#define x2 $f18
#define x3 $f21
#define a0 $f22
#define a1 $f23
#define a2 $f24
#define a3 $f25
#define a4 $f26
#define a5 $f27
#define a6 $f28
#define a7 $f29
#define a8 $f2
#define a9 $f3
#define a10 $f4
#define a11 $f5
#define a12 $f6
#define a13 $f7
#define a14 $f8
#define a15 $f9
#if !defined(CONJ) && !defined(XCONJ)
#define ADD1 ADD
#define ADD2 ADD
#define ADD3 SUB
#define ADD4 ADD
#elif !defined(CONJ) && defined(XCONJ)
#define ADD1 ADD
#define ADD2 ADD
#define ADD3 ADD
#define ADD4 SUB
#elif defined(CONJ) && !defined(XCONJ)
#define ADD1 ADD
#define ADD2 SUB
#define ADD3 ADD
#define ADD4 ADD
#else
#define ADD1 ADD
#define ADD2 SUB
#define ADD3 SUB
#define ADD4 SUB
#endif
PROLOGUE
lda $sp, -STACKSIZE($sp)
ldq LDA, 0 + STACKSIZE($sp)
ldq X, 8 + STACKSIZE($sp)
ldq INCX, 16 + STACKSIZE($sp)
ldq Y, 24 + STACKSIZE($sp)
ldq INCY, 32 + STACKSIZE($sp)
ldq BUFFER, 40 + STACKSIZE($sp)
stt $f2, 0($sp)
stt $f3, 8($sp)
stt $f4, 16($sp)
stt $f5, 24($sp)
stt $f6, 32($sp)
stt $f7, 40($sp)
stt $f8, 48($sp)
stt $f9, 56($sp)
PROFCODE
cmple M, 0, $0
sll INCX, ZBASE_SHIFT, INCX
cmple N, 0, $1
sll INCY, ZBASE_SHIFT, INCY
or $0, $1, $0
bne $0, $L999
cmpeq INCX, 2 * SIZE, $0
mov X, X1
sll LDA, ZBASE_SHIFT,LDA
bne $0, $L10
sra M, 2, I
mov BUFFER, Y1
mov BUFFER, X
ble I, $L05
.align 4
$L02:
ldl $31, (PREFETCHSIZE + 0) * SIZE(X1)
lda I, -1(I)
LD a0, 0 * SIZE(X1)
LD a1, 1 * SIZE(X1)
addq X1, INCX, X1
LD a2, 0 * SIZE(X1)
LD a3, 1 * SIZE(X1)
addq X1, INCX, X1
ST a0, 0 * SIZE(Y1)
ST a1, 1 * SIZE(Y1)
ST a2, 2 * SIZE(Y1)
ST a3, 3 * SIZE(Y1)
LD a4, 0 * SIZE(X1)
LD a5, 1 * SIZE(X1)
addq X1, INCX, X1
LD a6, 0 * SIZE(X1)
LD a7, 1 * SIZE(X1)
addq X1, INCX, X1
ST a4, 4 * SIZE(Y1)
ST a5, 5 * SIZE(Y1)
ST a6, 6 * SIZE(Y1)
ST a7, 7 * SIZE(Y1)
lda Y1, 8 * SIZE(Y1)
bgt I, $L02
.align 4
$L05:
and M, 3, I
ble I, $L10
.align 4
$L06:
LD a0, 0 * SIZE(X1)
LD a1, 1 * SIZE(X1)
addq X1, INCX, X1
ST a0, 0 * SIZE(Y1)
ST a1, 1 * SIZE(Y1)
lda Y1, 2 * SIZE(Y1)
lda I, -1(I)
bgt I, $L06
.align 4
$L10:
mov Y, Y1
fclr t0
unop
fclr t1
sra N, 1, J
fclr t2
fclr t3
ble J, $L20
.align 4
$L11:
mov A, A1
fclr s0
addq A, LDA, A2
fclr s1
addq A2, LDA, A
unop
mov X, X1
lds $f31, 3 * SIZE(Y)
sra M, 2, I
fclr s2
fclr s3
ble I, $L15
LD a0, 0 * SIZE(A1)
LD a1, 1 * SIZE(A1)
LD a2, 0 * SIZE(A2)
LD a3, 1 * SIZE(A2)
LD a4, 2 * SIZE(A1)
LD a5, 3 * SIZE(A1)
LD a6, 2 * SIZE(A2)
LD a7, 3 * SIZE(A2)
LD a8, 4 * SIZE(A1)
LD a9, 5 * SIZE(A1)
LD a10, 4 * SIZE(A2)
LD a11, 5 * SIZE(A2)
LD a12, 6 * SIZE(A1)
LD a13, 7 * SIZE(A1)
LD a14, 6 * SIZE(A2)
LD a15, 7 * SIZE(A2)
LD x0, 0 * SIZE(X1)
LD x1, 1 * SIZE(X1)
LD x2, 2 * SIZE(X1)
lda I, -1(I)
ble I, $L13
.align 4
$L12:
ADD3 s0, t0, s0
unop
MUL x0, a0, t0
LD x3, 3 * SIZE(X1)
ADD4 s1, t1, s1
ldl $31, (PREFETCHSIZE + 0) * SIZE(A1)
MUL x0, a1, t1
unop
ADD3 s2, t2, s2
unop
MUL x0, a2, t2
unop
ADD4 s3, t3, s3
unop
MUL x0, a3, t3
LD x0, 4 * SIZE(X1)
ADD1 s0, t0, s0
unop
MUL x1, a1, t0
LD a1, 9 * SIZE(A1)
ADD2 s1, t1, s1
unop
MUL x1, a0, t1
LD a0, 8 * SIZE(A1)
ADD1 s2, t2, s2
unop
MUL x1, a3, t2
LD a3, 9 * SIZE(A2)
ADD2 s3, t3, s3
unop
MUL x1, a2, t3
LD a2, 8 * SIZE(A2)
ADD3 s0, t0, s0
unop
MUL x2, a4, t0
LD x1, 5 * SIZE(X1)
ADD4 s1, t1, s1
MUL x2, a5, t1
ADD3 s2, t2, s2
MUL x2, a6, t2
ADD4 s3, t3, s3
unop
MUL x2, a7, t3
LD x2, 6 * SIZE(X1)
ADD1 s0, t0, s0
unop
MUL x3, a5, t0
LD a5, 11 * SIZE(A1)
ADD2 s1, t1, s1
unop
MUL x3, a4, t1
LD a4, 10 * SIZE(A1)
ADD1 s2, t2, s2
unop
MUL x3, a7, t2
LD a7, 11 * SIZE(A2)
ADD2 s3, t3, s3
unop
MUL x3, a6, t3
LD a6, 10 * SIZE(A2)
ADD3 s0, t0, s0
unop
MUL x0, a8, t0
LD x3, 7 * SIZE(X1)
ADD4 s1, t1, s1
ldl $31, (PREFETCHSIZE + 0) * SIZE(A2)
MUL x0, a9, t1
unop
ADD3 s2, t2, s2
lda I, -1(I)
MUL x0, a10, t2
unop
ADD4 s3, t3, s3
unop
MUL x0, a11, t3
LD x0, 8 * SIZE(X1)
ADD1 s0, t0, s0
unop
MUL x1, a9, t0
LD a9, 13 * SIZE(A1)
ADD2 s1, t1, s1
unop
MUL x1, a8, t1
LD a8, 12 * SIZE(A1)
ADD1 s2, t2, s2
lda A1, 8 * SIZE(A1)
MUL x1, a11, t2
LD a11, 13 * SIZE(A2)
ADD2 s3, t3, s3
unop
MUL x1, a10, t3
LD a10, 12 * SIZE(A2)
ADD3 s0, t0, s0
unop
MUL x2, a12, t0
LD x1, 9 * SIZE(X1)
ADD4 s1, t1, s1
ldl $31, (PREFETCHSIZE + 0) * SIZE(X1)
MUL x2, a13, t1
lda A2, 8 * SIZE(A2)
ADD3 s2, t2, s2
unop
MUL x2, a14, t2
unop
ADD4 s3, t3, s3
unop
MUL x2, a15, t3
LD x2, 10 * SIZE(X1)
ADD1 s0, t0, s0
unop
MUL x3, a13, t0
LD a13, 7 * SIZE(A1)
ADD2 s1, t1, s1
lda X1, 8 * SIZE(X1)
MUL x3, a12, t1
LD a12, 6 * SIZE(A1)
ADD1 s2, t2, s2
unop
MUL x3, a15, t2
LD a15, 7 * SIZE(A2)
ADD2 s3, t3, s3
MUL x3, a14, t3
LD a14, 6 * SIZE(A2)
bgt I, $L12
.align 4
$L13:
ADD3 s0, t0, s0
unop
MUL x0, a0, t0
LD x3, 3 * SIZE(X1)
ADD4 s1, t1, s1
MUL x0, a1, t1
ADD3 s2, t2, s2
MUL x0, a2, t2
ADD4 s3, t3, s3
unop
MUL x0, a3, t3
LD x0, 4 * SIZE(X1)
ADD1 s0, t0, s0
MUL x1, a1, t0
ADD2 s1, t1, s1
MUL x1, a0, t1
ADD1 s2, t2, s2
unop
MUL x1, a3, t2
unop
ADD2 s3, t3, s3
lda A1, 8 * SIZE(A1)
MUL x1, a2, t3
LD x1, 5 * SIZE(X1)
ADD3 s0, t0, s0
MUL x2, a4, t0
ADD4 s1, t1, s1
MUL x2, a5, t1
ADD3 s2, t2, s2
unop
MUL x2, a6, t2
unop
ADD4 s3, t3, s3
lda A2, 8 * SIZE(A2)
MUL x2, a7, t3
LD x2, 6 * SIZE(X1)
ADD1 s0, t0, s0
MUL x3, a5, t0
ADD2 s1, t1, s1
MUL x3, a4, t1
ADD1 s2, t2, s2
unop
MUL x3, a7, t2
lda X1, 8 * SIZE(X1)
ADD2 s3, t3, s3
unop
MUL x3, a6, t3
LD x3, -1 * SIZE(X1)
ADD3 s0, t0, s0
MUL x0, a8, t0
ADD4 s1, t1, s1
MUL x0, a9, t1
ADD3 s2, t2, s2
MUL x0, a10, t2
ADD4 s3, t3, s3
MUL x0, a11, t3
ADD1 s0, t0, s0
MUL x1, a9, t0
ADD2 s1, t1, s1
MUL x1, a8, t1
ADD1 s2, t2, s2
MUL x1, a11, t2
ADD2 s3, t3, s3
MUL x1, a10, t3
ADD3 s0, t0, s0
MUL x2, a12, t0
ADD4 s1, t1, s1
MUL x2, a13, t1
ADD3 s2, t2, s2
MUL x2, a14, t2
ADD4 s3, t3, s3
MUL x2, a15, t3
ADD1 s0, t0, s0
MUL x3, a13, t0
ADD2 s1, t1, s1
MUL x3, a12, t1
ADD1 s2, t2, s2
MUL x3, a15, t2
ADD2 s3, t3, s3
MUL x3, a14, t3
.align 4
$L15:
and M, 3, I
ble I, $L18
LD a0, 0 * SIZE(A1)
LD a1, 1 * SIZE(A1)
LD a2, 0 * SIZE(A2)
LD a3, 1 * SIZE(A2)
LD x0, 0 * SIZE(X1)
lda I, -1(I)
ble I, $L17
.align 4
$L16:
ADD3 s0, t0, s0
lda I, -1(I)
MUL x0, a0, t0
LD x1, 1 * SIZE(X1)
ADD4 s1, t1, s1
MUL x0, a1, t1
ADD3 s2, t2, s2
MUL x0, a2, t2
ADD4 s3, t3, s3
unop
MUL x0, a3, t3
LD x0, 2 * SIZE(X1)
ADD1 s0, t0, s0
lda A2, 2 * SIZE(A2)
MUL x1, a1, t0
LD a1, 3 * SIZE(A1)
ADD2 s1, t1, s1
lda X1, 2 * SIZE(X1)
MUL x1, a0, t1
LD a0, 2 * SIZE(A1)
ADD1 s2, t2, s2
lda A1, 2 * SIZE(A1)
MUL x1, a3, t2
LD a3, 1 * SIZE(A2)
ADD2 s3, t3, s3
MUL x1, a2, t3
LD a2, 0 * SIZE(A2)
bgt I, $L16
.align 4
$L17:
ADD3 s0, t0, s0
unop
MUL x0, a0, t0
LD x1, 1 * SIZE(X1)
ADD4 s1, t1, s1
unop
MUL x0, a1, t1
unop
ADD3 s2, t2, s2
MUL x0, a2, t2
ADD4 s3, t3, s3
MUL x0, a3, t3
ADD1 s0, t0, s0
MUL x1, a1, t0
ADD2 s1, t1, s1
MUL x1, a0, t1
ADD1 s2, t2, s2
MUL x1, a3, t2
ADD2 s3, t3, s3
MUL x1, a2, t3
.align 4
$L18:
LD a0, 0 * SIZE(Y)
unop
LD a1, 1 * SIZE(Y)
addq Y, INCY, Y
LD a2, 0 * SIZE(Y)
unop
LD a3, 1 * SIZE(Y)
addq Y, INCY, Y
ADD3 s0, t0, s0
ADD4 s1, t1, s1
ADD3 s2, t2, s2
ADD4 s3, t3, s3
MUL alpha_r, s0, t0
MUL alpha_r, s1, t1
MUL alpha_r, s2, t2
MUL alpha_r, s3, t3
ADD a0, t0, a0
MUL alpha_i, s1, t0
ADD a1, t1, a1
MUL alpha_i, s0, t1
ADD a2, t2, a2
MUL alpha_i, s3, t2
ADD a3, t3, a3
MUL alpha_i, s2, t3
SUB a0, t0, a0
ADD a1, t1, a1
SUB a2, t2, a2
ADD a3, t3, a3
ST a0, 0 * SIZE(Y1)
fclr t0
ST a1, 1 * SIZE(Y1)
addq Y1, INCY, Y1
ST a2, 0 * SIZE(Y1)
fclr t1
ST a3, 1 * SIZE(Y1)
addq Y1, INCY, Y1
fclr t2
lda J, -1(J)
fclr t3
bgt J, $L11
.align 4
$L20:
blbc N, $L999
mov A, A1
fclr s0
fclr s1
mov X, X1
sra M, 2, I
fclr s2
fclr s3
ble I, $L25
LD a0, 0 * SIZE(A1)
LD a1, 1 * SIZE(A1)
LD a4, 2 * SIZE(A1)
LD a5, 3 * SIZE(A1)
LD a8, 4 * SIZE(A1)
LD a9, 5 * SIZE(A1)
LD a12, 6 * SIZE(A1)
LD a13, 7 * SIZE(A1)
LD x0, 0 * SIZE(X1)
LD x1, 1 * SIZE(X1)
LD x2, 2 * SIZE(X1)
lda I, -1(I)
ble I, $L23
.align 4
$L22:
ADD3 s0, t0, s0
ldl $31, (PREFETCHSIZE + 0) * SIZE(A1)
MUL x0, a0, t0
LD x3, 3 * SIZE(X1)
ADD4 s1, t1, s1
unop
MUL x0, a1, t1
LD x0, 4 * SIZE(X1)
ADD1 s2, t0, s2
lda I, -1(I)
MUL x1, a1, t0
LD a1, 9 * SIZE(A1)
ADD2 s3, t1, s3
unop
MUL x1, a0, t1
LD a0, 8 * SIZE(A1)
ADD3 s0, t0, s0
unop
MUL x2, a4, t0
LD x1, 5 * SIZE(X1)
ADD4 s1, t1, s1
unop
MUL x2, a5, t1
LD x2, 6 * SIZE(X1)
ADD1 s2, t0, s2
unop
MUL x3, a5, t0
LD a5, 11 * SIZE(A1)
ADD2 s3, t1, s3
unop
MUL x3, a4, t1
LD a4, 10 * SIZE(A1)
ADD3 s0, t0, s0
unop
MUL x0, a8, t0
LD x3, 7 * SIZE(X1)
ADD4 s1, t1, s1
unop
MUL x0, a9, t1
LD x0, 8 * SIZE(X1)
ADD1 s2, t0, s2
unop
MUL x1, a9, t0
LD a9, 13 * SIZE(A1)
ADD2 s3, t1, s3
unop
MUL x1, a8, t1
LD a8, 12 * SIZE(A1)
ADD3 s0, t0, s0
unop
MUL x2, a12, t0
LD x1, 9 * SIZE(X1)
ADD4 s1, t1, s1
lda A1, 8 * SIZE(A1)
MUL x2, a13, t1
LD x2, 10 * SIZE(X1)
ADD1 s2, t0, s2
lda X1, 8 * SIZE(X1)
MUL x3, a13, t0
LD a13, 7 * SIZE(A1)
ADD2 s3, t1, s3
MUL x3, a12, t1
LD a12, 6 * SIZE(A1)
bgt I, $L22
.align 4
$L23:
ADD3 s0, t0, s0
unop
MUL x0, a0, t0
LD x3, 3 * SIZE(X1)
ADD4 s1, t1, s1
unop
MUL x0, a1, t1
LD x0, 4 * SIZE(X1)
ADD1 s2, t0, s2
unop
MUL x1, a1, t0
lda A1, 8 * SIZE(A1)
ADD2 s3, t1, s3
unop
MUL x1, a0, t1
LD x1, 5 * SIZE(X1)
ADD3 s0, t0, s0
unop
MUL x2, a4, t0
unop
ADD4 s1, t1, s1
unop
MUL x2, a5, t1
LD x2, 6 * SIZE(X1)
ADD1 s2, t0, s2
unop
MUL x3, a5, t0
lda X1, 8 * SIZE(X1)
ADD2 s3, t1, s3
unop
MUL x3, a4, t1
LD x3, -1 * SIZE(X1)
ADD3 s0, t0, s0
MUL x0, a8, t0
ADD4 s1, t1, s1
MUL x0, a9, t1
ADD1 s2, t0, s2
MUL x1, a9, t0
ADD2 s3, t1, s3
MUL x1, a8, t1
ADD3 s0, t0, s0
MUL x2, a12, t0
ADD4 s1, t1, s1
MUL x2, a13, t1
ADD1 s2, t0, s2
MUL x3, a13, t0
ADD2 s3, t1, s3
MUL x3, a12, t1
.align 4
$L25:
and M, 3, I
ble I, $L28
LD a0, 0 * SIZE(A1)
LD a1, 1 * SIZE(A1)
LD x0, 0 * SIZE(X1)
lda I, -1(I)
ble I, $L27
.align 4
$L26:
ADD3 s0, t0, s0
lda A1, 2 * SIZE(A1)
MUL x0, a0, t0
LD x1, 1 * SIZE(X1)
ADD4 s1, t1, s1
lda I, -1(I)
MUL x0, a1, t1
LD x0, 2 * SIZE(X1)
ADD1 s0, t0, s0
lda X1, 2 * SIZE(X1)
MUL x1, a1, t0
LD a1, 1 * SIZE(A1)
ADD2 s1, t1, s1
MUL x1, a0, t1
LD a0, 0 * SIZE(A1)
bgt I, $L26
.align 4
$L27:
ADD3 s0, t0, s0
unop
MUL x0, a0, t0
LD x1, 1 * SIZE(X1)
ADD4 s1, t1, s1
unop
MUL x0, a1, t1
unop
ADD1 s0, t0, s0
MUL x1, a1, t0
ADD2 s1, t1, s1
MUL x1, a0, t1
.align 4
$L28:
LD a0, 0 * SIZE(Y)
LD a1, 1 * SIZE(Y)
ADD3 s0, t0, s0
ADD4 s1, t1, s1
ADD3 s2, t2, s2
ADD4 s3, t3, s3
ADD s0, s2, s0
ADD s1, s3, s1
MUL alpha_r, s0, t0
MUL alpha_r, s1, t1
ADD a0, t0, a0
MUL alpha_i, s1, t0
ADD a1, t1, a1
MUL alpha_i, s0, t1
SUB a0, t0, a0
ADD a1, t1, a1
ST a0, 0 * SIZE(Y1)
ST a1, 1 * SIZE(Y1)
.align 4
$L999:
ldt $f2, 0($sp)
ldt $f3, 8($sp)
ldt $f4, 16($sp)
ldt $f5, 24($sp)
ldt $f6, 32($sp)
ldt $f7, 40($sp)
ldt $f8, 48($sp)
ldt $f9, 56($sp)
lda $sp, STACKSIZE($sp)
ret
EPILOGUE

426
kernel/alpha/znrm2.S Normal file
View File

@@ -0,0 +1,426 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCH_SIZE 80
#define N $16
#define X $17
#define INCX $18
#define XX $19
#define I $0
#define a0 $f0
#define a1 $f1
#define a2 $f10
#define a3 $f11
#define t0 $f12
#define t1 $f13
#define t2 $f14
#define t3 $f15
#define x0 $f16
#define x1 $f17
#define x2 $f18
#define x3 $f19
#define x4 $f20
#define x5 $f21
#define x6 $f22
#define x7 $f23
PROLOGUE
#if defined(EV4) || defined(EV5)
.frame $30,16,$26,0
.mask 0x4000000,-16
ldah $29, 0($27) !gpdisp!1
lda $29, 0($29) !gpdisp!1
lda $sp, -16($sp)
ldq $27, sqrt($29) !literal!2
stq $26, 0($sp)
PROFCODE
.prologue 1
#else
PROFCODE
#endif
fclr a0
sll INCX, ZBASE_SHIFT, INCX
fclr a1
ble N, $L999
fclr a2
cmpeq INCX, 2 * SIZE, $0
fclr a3
beq $0, $L20
fclr t0
sra N, 3, I
fclr t1
ble I, $L15
fclr t2
LD x0, 0 * SIZE(X)
fclr t3
LD x1, 1 * SIZE(X)
LD x2, 2 * SIZE(X)
LD x3, 3 * SIZE(X)
LD x4, 4 * SIZE(X)
LD x5, 5 * SIZE(X)
LD x6, 6 * SIZE(X)
LD x7, 7 * SIZE(X)
lda I, -1(I)
ble I, $L12
.align 4
$L11:
addt a0, t0, a0
ldl $31, (PREFETCH_SIZE) * SIZE(X)
mult x0, x0, t0
LD x0, 8 * SIZE(X)
addt a1, t1, a1
mov X, XX
mult x1, x1, t1
LD x1, 9 * SIZE(X)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 10 * SIZE(X)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 11 * SIZE(X)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 12 * SIZE(X)
addt a1, t1, a1
unop
mult x5, x5, t1
LD x5, 13 * SIZE(X)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 14 * SIZE(X)
addt a3, t3, a3
unop
mult x7, x7, t3
LD x7, 15 * SIZE(X)
addt a0, t0, a0
unop
mult x0, x0, t0
LD x0, 16 * SIZE(X)
addt a1, t1, a1
lda X, 16 * SIZE(X)
mult x1, x1, t1
LD x1, 17 * SIZE(XX)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 18 * SIZE(XX)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 19 * SIZE(XX)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 20 * SIZE(XX)
addt a1, t1, a1
lda I, -1(I)
mult x5, x5, t1
LD x5, 21 * SIZE(XX)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 22 * SIZE(XX)
addt a3, t3, a3
mult x7, x7, t3
LD x7, 23 * SIZE(XX)
bgt I, $L11
.align 4
$L12:
addt a0, t0, a0
mov X, XX
mult x0, x0, t0
LD x0, 8 * SIZE(X)
addt a1, t1, a1
unop
mult x1, x1, t1
LD x1, 9 * SIZE(X)
addt a2, t2, a2
unop
mult x2, x2, t2
LD x2, 10 * SIZE(X)
addt a3, t3, a3
unop
mult x3, x3, t3
LD x3, 11 * SIZE(X)
addt a0, t0, a0
unop
mult x4, x4, t0
LD x4, 12 * SIZE(XX)
addt a1, t1, a1
unop
mult x5, x5, t1
LD x5, 13 * SIZE(XX)
addt a2, t2, a2
unop
mult x6, x6, t2
LD x6, 14 * SIZE(XX)
addt a3, t3, a3
lda X, 16 * SIZE(X)
mult x7, x7, t3
LD x7, 15 * SIZE(XX)
addt a0, t0, a0
mult x0, x0, t0
addt a1, t1, a1
mult x1, x1, t1
addt a2, t2, a2
mult x2, x2, t2
addt a3, t3, a3
mult x3, x3, t3
addt a0, t0, a0
mult x4, x4, t0
addt a1, t1, a1
mult x5, x5, t1
addt a2, t2, a2
mult x6, x6, t2
addt a3, t3, a3
mult x7, x7, t3
addt a2, t2, a2
addt a3, t3, a3
.align 4
$L15:
and N, 7, I
ble I, $L998
.align 4
$L16:
LD x0, 0 * SIZE(X)
LD x1, 1 * SIZE(X)
lda X, 2 * SIZE(X)
addt a0, t0, a0
mult x0, x0, t0
addt a1, t1, a1
mult x1, x1, t1
lda I, -1(I)
bgt I, $L16
bsr $31, $L998
.align 4
$L20:
fclr t0
sra N, 2, I
fclr t1
ble I, $L25
LD x0, 0 * SIZE(X)
fclr t2
LD x1, 1 * SIZE(X)
addq X, INCX, X
LD x2, 0 * SIZE(X)
fclr t3
LD x3, 1 * SIZE(X)
addq X, INCX, X
LD x4, 0 * SIZE(X)
lda I, -1(I)
LD x5, 1 * SIZE(X)
addq X, INCX, X
LD x6, 0 * SIZE(X)
ble I, $L22
.align 4
$L21:
addt a0, t0, a0
LD x7, 1 * SIZE(X)
mult x0, x0, t0
addq X, INCX, X
addt a1, t1, a1
LD x0, 0 * SIZE(X)
mult x1, x1, t1
unop
addt a2, t2, a2
LD x1, 1 * SIZE(X)
mult x2, x2, t2
addq X, INCX, X
addt a3, t3, a3
LD x2, 0 * SIZE(X)
mult x3, x3, t3
unop
addt a0, t0, a0
LD x3, 1 * SIZE(X)
mult x4, x4, t0
addq X, INCX, X
addt a1, t1, a1
LD x4, 0 * SIZE(X)
mult x5, x5, t1
lda I, -1(I)
addt a2, t2, a2
LD x5, 1 * SIZE(X)
mult x6, x6, t2
addq X, INCX, X
addt a3, t3, a3
LD x6, 0 * SIZE(X)
mult x7, x7, t3
bgt I, $L21
.align 4
$L22:
addt a0, t0, a0
LD x7, 1 * SIZE(X)
mult x0, x0, t0
addq X, INCX, X
addt a1, t1, a1
mult x1, x1, t1
addt a2, t2, a2
mult x2, x2, t2
addt a3, t3, a3
mult x3, x3, t3
addt a0, t0, a0
mult x4, x4, t0
addt a1, t1, a1
mult x5, x5, t1
addt a2, t2, a2
mult x6, x6, t2
addt a3, t3, a3
mult x7, x7, t3
addt a2, t2, a2
addt a3, t3, a3
.align 4
$L25:
and N, 3, I
ble I, $L998
.align 4
$L26:
LD x0, 0 * SIZE(X)
lda I, -1(I)
LD x1, 1 * SIZE(X)
addq X, INCX, X
addt a0, t0, a0
mult x0, x0, t0
addt a1, t1, a1
mult x1, x1, t1
bgt I, $L26
.align 4
$L998:
addt a0, t0, a0
addt a1, t1, a1
addt a0, a1, a0
addt a2, a3, a2
#if defined(EV4) || defined(EV5)
addt a0, a2, $f16
jsr $26, ($27), sqrt !lituse_jsr!2
ldah $29, 0($26) !gpdisp!3
lda $29, 0($29) !gpdisp!3
#else
addt a0, a2, a0
sqrtt a0, a0
#endif
.align 4
$L999:
#if defined(EV4) || defined(EV5)
ldq $26, 0($sp)
lda $sp, 16($sp)
#endif
ret
EPILOGUE

631
kernel/alpha/zrot.S Normal file
View File

@@ -0,0 +1,631 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#define Y $19
#define INCY $20
#define I $21
#define XX $23
#define YY $24
#define C $f10
#define S $f11
#define PREFETCH_SIZE 80
PROLOGUE
PROFCODE
.frame $sp, 0, $26, 0
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
fmov $f21, C
LD S, 0($sp)
addq INCX, INCX, INCX
addq INCY, INCY, INCY
cmpeq INCX, 2, $23
cmpeq INCY, 2, $24
ble N, $L998
and $23, $24, $23
beq $23, $L50
sra N, 2, I
ble I, $L15
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
LD $f14, 1*SIZE(X)
LD $f15, 1*SIZE(Y)
LD $f16, 2*SIZE(X)
LD $f17, 2*SIZE(Y)
LD $f18, 3*SIZE(X)
LD $f19, 3*SIZE(Y)
MUL C, $f12, $f21
unop
MUL S, $f13, $f22
MUL C, $f13, $f23
LD $f13, 4*SIZE(Y)
MUL S, $f12, $f24
LD $f12, 4*SIZE(X)
MUL C, $f14, $f25
lda I, -1(I)
MUL S, $f15, $f26
ADD $f21, $f22, $f22
MUL C, $f15, $f27
LD $f15, 5*SIZE(Y)
MUL S, $f14, $f28
SUB $f23, $f24, $f24
ble I, $L13
.align 4
$L12:
MUL C, $f16, $f21
lds $f31, (PREFETCH_SIZE) * SIZE(X)
unop
LD $f14, 5*SIZE(X)
ST $f22, 0*SIZE(X)
MUL S, $f17, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f17, $f23
lds $f31, (PREFETCH_SIZE) * SIZE(Y)
unop
LD $f17, 6*SIZE(Y)
ST $f24, 0*SIZE(Y)
MUL S, $f16, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f18, $f25
LD $f16, 6*SIZE(X)
unop
unop
ST $f26, 1*SIZE(X)
MUL S, $f19, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f19, $f27
unop
unop
LD $f19, 7*SIZE(Y)
ST $f28, 1*SIZE(Y)
MUL S, $f18, $f28
unop
SUB $f23, $f24, $f24
MUL C, $f12, $f21
LD $f18, 7*SIZE(X)
unop
unop
ST $f22, 2*SIZE(X)
unop
MUL S, $f13, $f22
ADD $f25, $f26, $f26
MUL C, $f13, $f23
LD $f13, 8*SIZE(Y)
unop
unop
ST $f24, 2*SIZE(Y)
MUL S, $f12, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f14, $f25
LD $f12, 8*SIZE(X)
unop
unop
ST $f26, 3*SIZE(X)
MUL S, $f15, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f15, $f27
LD $f15, 9*SIZE(Y)
unop
unop
ST $f28, 3*SIZE(Y)
MUL S, $f14, $f28
unop
SUB $f23, $f24, $f24
MUL C, $f16, $f21
LD $f14, 9*SIZE(X)
unop
unop
ST $f22, 4*SIZE(X)
MUL S, $f17, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f17, $f23
LD $f17, 10*SIZE(Y)
unop
unop
ST $f24, 4*SIZE(Y)
MUL S, $f16, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f18, $f25
LD $f16, 10*SIZE(X)
unop
unop
ST $f26, 5*SIZE(X)
MUL S, $f19, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f19, $f27
LD $f19, 11*SIZE(Y)
unop
unop
ST $f28, 5*SIZE(Y)
MUL S, $f18, $f28
lda I, -1(I)
SUB $f23, $f24, $f24
MUL C, $f12, $f21
LD $f18, 11*SIZE(X)
unop
unop
ST $f22, 6*SIZE(X)
MUL S, $f13, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f13, $f23
LD $f13, 12*SIZE(Y)
lda X, 8*SIZE(X)
unop
ST $f24, 6*SIZE(Y)
MUL S, $f12, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f14, $f25
LD $f12, 4*SIZE(X)
lda Y, 8*SIZE(Y)
unop
ST $f26, -1*SIZE(X)
MUL S, $f15, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f15, $f27
LD $f15, 5*SIZE(Y)
unop
unop
ST $f28, -1*SIZE(Y)
MUL S, $f14, $f28
SUB $f23, $f24, $f24
bgt I, $L12
.align 4
$L13:
MUL C, $f16, $f21
LD $f14, 5*SIZE(X)
unop
unop
ST $f22, 0*SIZE(X)
MUL S, $f17, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f17, $f23
unop
unop
LD $f17, 6*SIZE(Y)
ST $f24, 0*SIZE(Y)
MUL S, $f16, $f24
LD $f16, 6*SIZE(X)
SUB $f27, $f28, $f28
MUL C, $f18, $f25
unop
unop
unop
ST $f26, 1*SIZE(X)
MUL S, $f19, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f19, $f27
unop
unop
LD $f19, 7*SIZE(Y)
ST $f28, 1*SIZE(Y)
MUL S, $f18, $f28
LD $f18, 7*SIZE(X)
SUB $f23, $f24, $f24
MUL C, $f12, $f21
unop
unop
unop
ST $f22, 2*SIZE(X)
unop
MUL S, $f13, $f22
ADD $f25, $f26, $f26
MUL C, $f13, $f23
unop
unop
unop
ST $f24, 2*SIZE(Y)
MUL S, $f12, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f14, $f25
unop
unop
unop
ST $f26, 3*SIZE(X)
MUL S, $f15, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f15, $f27
unop
unop
unop
ST $f28, 3*SIZE(Y)
MUL S, $f14, $f28
unop
SUB $f23, $f24, $f24
MUL C, $f16, $f21
unop
unop
unop
ST $f22, 4*SIZE(X)
MUL S, $f17, $f22
unop
ADD $f25, $f26, $f26
MUL C, $f17, $f23
unop
unop
unop
ST $f24, 4*SIZE(Y)
MUL S, $f16, $f24
unop
SUB $f27, $f28, $f28
MUL C, $f18, $f25
unop
unop
unop
ST $f26, 5*SIZE(X)
MUL S, $f19, $f26
unop
ADD $f21, $f22, $f22
MUL C, $f19, $f27
unop
unop
unop
ST $f28, 5*SIZE(Y)
MUL S, $f18, $f28
unop
SUB $f23, $f24, $f24
ST $f22, 6*SIZE(X)
ADD $f25, $f26, $f26
ST $f24, 6*SIZE(Y)
SUB $f27, $f28, $f28
ST $f26, 7*SIZE(X)
lda X, 8*SIZE(X)
ST $f28, 7*SIZE(Y)
lda Y, 8*SIZE(Y)
.align 4
$L15:
and N, 3, I
ble I, $L998
.align 4
$L16:
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
LD $f14, 1*SIZE(X)
LD $f15, 1*SIZE(Y)
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
MUL C, $f14, $f25
MUL S, $f15, $f26
MUL C, $f15, $f27
MUL S, $f14, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f22, 0*SIZE(X)
ST $f24, 0*SIZE(Y)
lda I, -1(I)
ST $f26, 1*SIZE(X)
lda X, 2 * SIZE(X)
ST $f28, 1*SIZE(Y)
lda Y, 2 * SIZE(Y)
bgt I, $L16
.align 4
$L998:
clr $0
ret
.align 4
$L50:
mov X, XX
mov Y, YY
sra N, 2, I
ble I, $L55
.align 4
$L51:
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
LD $f14, 1*SIZE(X)
SXADDQ INCX, X, X
LD $f15, 1*SIZE(Y)
SXADDQ INCY, Y, Y
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
MUL C, $f14, $f25
MUL S, $f15, $f26
MUL C, $f15, $f27
MUL S, $f14, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f22, 0*SIZE(XX)
ST $f24, 0*SIZE(YY)
ST $f26, 1*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f28, 1*SIZE(YY)
SXADDQ INCY, YY, YY
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
LD $f14, 1*SIZE(X)
SXADDQ INCX, X, X
LD $f15, 1*SIZE(Y)
SXADDQ INCY, Y, Y
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
MUL C, $f14, $f25
MUL S, $f15, $f26
MUL C, $f15, $f27
MUL S, $f14, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f22, 0*SIZE(XX)
ST $f24, 0*SIZE(YY)
ST $f26, 1*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f28, 1*SIZE(YY)
SXADDQ INCY, YY, YY
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
LD $f14, 1*SIZE(X)
SXADDQ INCX, X, X
LD $f15, 1*SIZE(Y)
SXADDQ INCY, Y, Y
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
MUL C, $f14, $f25
MUL S, $f15, $f26
MUL C, $f15, $f27
MUL S, $f14, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f22, 0*SIZE(XX)
ST $f24, 0*SIZE(YY)
ST $f26, 1*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f28, 1*SIZE(YY)
SXADDQ INCY, YY, YY
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
LD $f14, 1*SIZE(X)
SXADDQ INCX, X, X
LD $f15, 1*SIZE(Y)
SXADDQ INCY, Y, Y
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
MUL C, $f14, $f25
MUL S, $f15, $f26
MUL C, $f15, $f27
MUL S, $f14, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f22, 0*SIZE(XX)
ST $f24, 0*SIZE(YY)
ST $f26, 1*SIZE(XX)
SXADDQ INCX, XX, XX
ST $f28, 1*SIZE(YY)
SXADDQ INCY, YY, YY
lda I, -1(I)
bgt I, $L51
.align 4
$L55:
and N, 3, I
ble I, $L999
.align 4
$L56:
LD $f12, 0*SIZE(X)
LD $f13, 0*SIZE(Y)
LD $f14, 1*SIZE(X)
LD $f15, 1*SIZE(Y)
MUL C, $f12, $f21
MUL S, $f13, $f22
MUL C, $f13, $f23
MUL S, $f12, $f24
ADD $f21, $f22, $f22
SUB $f23, $f24, $f24
MUL C, $f14, $f25
MUL S, $f15, $f26
MUL C, $f15, $f27
MUL S, $f14, $f28
ADD $f25, $f26, $f26
SUB $f27, $f28, $f28
ST $f22, 0*SIZE(X)
ST $f24, 0*SIZE(Y)
lda I, -1(I)
ST $f26, 1*SIZE(X)
ST $f28, 1*SIZE(Y)
SXADDQ INCX, X, X
SXADDQ INCY, Y, Y
bgt I, $L56
.align 4
$L999:
clr $0
ret
EPILOGUE

255
kernel/alpha/zscal.S Normal file
View File

@@ -0,0 +1,255 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define PREFETCHSIZE 88
#define N $16
#define X $21
#define INCX $17
#define XX $18
#define I $19
#define ALPHA_R $f19
#define ALPHA_I $f20
#define s0 $f0
#define s1 $f1
#define s2 $f10
#define s3 $f11
#define a0 $f12
#define a1 $f13
#define a2 $f14
#define a3 $f15
#define a4 $f16
#define a5 $f17
#define a6 $f18
#define a7 $f21
#define t0 $f22
#define t1 $f23
#define t2 $f24
#define t3 $f25
#define t4 $f26
#define t5 $f27
#define t6 $f28
#define t7 $f29
PROLOGUE
PROFCODE
ldq INCX, 0($sp)
mov X, XX
ble N, $L999
addq INCX, INCX, INCX
sra N, 2, I
ble I, $L15
LD a0, 0 * SIZE(X)
LD a1, 1 * SIZE(X)
SXADDQ INCX, X, X
LD a2, 0 * SIZE(X)
LD a3, 1 * SIZE(X)
SXADDQ INCX, X, X
LD a4, 0 * SIZE(X)
LD a5, 1 * SIZE(X)
SXADDQ INCX, X, X
LD a6, 0 * SIZE(X)
LD a7, 1 * SIZE(X)
SXADDQ INCX, X, X
MUL a0, ALPHA_R, t0
MUL a1, ALPHA_I, t1
MUL a0, ALPHA_I, t2
MUL a1, ALPHA_R, t3
SUB t0, t1, t4
ADD t2, t3, t5
lda I, -1(I)
ble I, $L13
.align 4
$L12:
ST t4, 0 * SIZE(XX)
MUL a2, ALPHA_R, t0
ST t5, 1 * SIZE(XX)
MUL a3, ALPHA_I, t1
MUL a2, ALPHA_I, t2
LD a0, 0 * SIZE(X)
MUL a3, ALPHA_R, t3
LD a1, 1 * SIZE(X)
SUB t0, t1, t6
SXADDQ INCX, XX, XX
ADD t2, t3, t7
SXADDQ INCX, X, X
MUL a4, ALPHA_R, t0
ST t6, 0 * SIZE(XX)
MUL a5, ALPHA_I, t1
ST t7, 1 * SIZE(XX)
MUL a4, ALPHA_I, t2
LD a2, 0 * SIZE(X)
MUL a5, ALPHA_R, t3
LD a3, 1 * SIZE(X)
SUB t0, t1, t4
SXADDQ INCX, XX, XX
ADD t2, t3, t5
SXADDQ INCX, X, X
MUL a6, ALPHA_R, t0
ST t4, 0 * SIZE(XX)
MUL a7, ALPHA_I, t1
ST t5, 1 * SIZE(XX)
MUL a6, ALPHA_I, t2
LD a4, 0 * SIZE(X)
MUL a7, ALPHA_R, t3
LD a5, 1 * SIZE(X)
SUB t0, t1, t6
SXADDQ INCX, XX, XX
ADD t2, t3, t7
SXADDQ INCX, X, X
MUL a0, ALPHA_R, t0
ST t6, 0 * SIZE(XX)
MUL a1, ALPHA_I, t1
ST t7, 1 * SIZE(XX)
MUL a0, ALPHA_I, t2
LD a6, 0 * SIZE(X)
MUL a1, ALPHA_R, t3
LD a7, 1 * SIZE(X)
SUB t0, t1, t4
lda I, -1(I)
ADD t2, t3, t5
SXADDQ INCX, XX, XX
lds $f31, PREFETCHSIZE * SIZE(X)
unop
SXADDQ INCX, X, X
bne I, $L12
.align 4
$L13:
MUL a2, ALPHA_R, t0
MUL a3, ALPHA_I, t1
ST t4, 0 * SIZE(XX)
MUL a2, ALPHA_I, t2
ST t5, 1 * SIZE(XX)
MUL a3, ALPHA_R, t3
SUB t0, t1, t6
SXADDQ INCX, XX, XX
ADD t2, t3, t7
unop
ST t6, 0 * SIZE(XX)
MUL a4, ALPHA_R, t0
ST t7, 1 * SIZE(XX)
MUL a5, ALPHA_I, t1
MUL a4, ALPHA_I, t2
MUL a5, ALPHA_R, t3
SUB t0, t1, t4
SXADDQ INCX, XX, XX
ADD t2, t3, t5
unop
MUL a6, ALPHA_R, t0
ST t4, 0 * SIZE(XX)
MUL a7, ALPHA_I, t1
ST t5, 1 * SIZE(XX)
MUL a6, ALPHA_I, t2
MUL a7, ALPHA_R, t3
SUB t0, t1, t6
SXADDQ INCX, XX, XX
ADD t2, t3, t7
ST t6, 0 * SIZE(XX)
ST t7, 1 * SIZE(XX)
SXADDQ INCX, XX, XX
.align 4
$L15:
and N, 3, I
unop
unop
ble I, $L999
.align 4
$L17:
LD a0, 0 * SIZE(X)
LD a1, 1 * SIZE(X)
SXADDQ INCX, X, X
MUL a0, ALPHA_R, t0
MUL a1, ALPHA_I, t1
MUL a0, ALPHA_I, t2
MUL a1, ALPHA_R, t3
SUB t0, t1, t4
ADD t2, t3, t5
ST t4, 0 * SIZE(XX)
ST t5, 1 * SIZE(XX)
SXADDQ INCX, XX, XX
lda I, -1(I)
bne I, $L17
.align 4
$L999:
ret
EPILOGUE

244
kernel/alpha/zswap.S Normal file
View File

@@ -0,0 +1,244 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
PROLOGUE
PROFCODE
.frame $sp, 0, $26, 0
mov $21, $17
ldl $18, 0($sp)
ldq $19, 8($sp)
ldl $20, 16($sp)
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
ble $16, $SubEnd # if n <= 0 goto $End
cmpeq $18, 1, $1
addq $18, $18, $18
cmpeq $20, 1, $2
addq $20, $20, $20
sra $16, 2, $21
and $1, $2, $1
and $16, 3, $22
beq $1, $Sub
ble $21, $MainRemain
.align 4
$MainLoop:
LD $f10, 0*SIZE($19)
LD $f11, 1*SIZE($19)
LD $f12, 2*SIZE($19)
LD $f13, 3*SIZE($19)
LD $f14, 4*SIZE($19)
LD $f15, 5*SIZE($19)
LD $f16, 6*SIZE($19)
LD $f17, 7*SIZE($19)
LD $f20, 0*SIZE($17)
LD $f21, 1*SIZE($17)
LD $f22, 2*SIZE($17)
LD $f23, 3*SIZE($17)
LD $f24, 4*SIZE($17)
LD $f25, 5*SIZE($17)
LD $f26, 6*SIZE($17)
LD $f27, 7*SIZE($17)
lds $f31, 16*SIZE($17)
unop
lds $f31, 16*SIZE($19)
subl $21, 1, $21
ST $f10, 0*SIZE($17)
ST $f11, 1*SIZE($17)
ST $f12, 2*SIZE($17)
ST $f13, 3*SIZE($17)
ST $f14, 4*SIZE($17)
ST $f15, 5*SIZE($17)
ST $f16, 6*SIZE($17)
ST $f17, 7*SIZE($17)
ST $f20, 0*SIZE($19)
ST $f21, 1*SIZE($19)
ST $f22, 2*SIZE($19)
ST $f23, 3*SIZE($19)
ST $f24, 4*SIZE($19)
ST $f25, 5*SIZE($19)
ST $f26, 6*SIZE($19)
ST $f27, 7*SIZE($19)
lda $17, 8*SIZE($17)
lda $19, 8*SIZE($19)
bgt $21, $MainLoop
.align 4
$MainRemain:
ble $22, $MainEnd
.align 4
$MainRemainLoop:
LD $f10, 0*SIZE($19)
LD $f11, 1*SIZE($19)
LD $f20, 0*SIZE($17)
LD $f21, 1*SIZE($17)
lda $17, 2*SIZE($17)
lda $19, 2*SIZE($19)
subl $22, 1, $22
ST $f10, -2*SIZE($17)
ST $f11, -1*SIZE($17)
ST $f20, -2*SIZE($19)
ST $f21, -1*SIZE($19)
bgt $22, $MainRemainLoop
.align 4
$MainEnd:
clr $0
ret
.align 4
$Sub:
mov $17, $23
mov $19, $24
ble $21, $SubRemain
.align 4
$SubLoop:
LD $f10, 0*SIZE($19)
LD $f11, 1*SIZE($19)
SXADDQ $20, $19, $19
LD $f12, 0*SIZE($19)
LD $f13, 1*SIZE($19)
SXADDQ $20, $19, $19
LD $f14, 0*SIZE($19)
LD $f15, 1*SIZE($19)
SXADDQ $20, $19, $19
LD $f16, 0*SIZE($19)
LD $f17, 1*SIZE($19)
SXADDQ $20, $19, $19
LD $f20, 0*SIZE($17)
LD $f21, 1*SIZE($17)
SXADDQ $18, $17, $17
LD $f22, 0*SIZE($17)
LD $f23, 1*SIZE($17)
SXADDQ $18, $17, $17
LD $f24, 0*SIZE($17)
LD $f25, 1*SIZE($17)
SXADDQ $18, $17, $17
LD $f26, 0*SIZE($17)
LD $f27, 1*SIZE($17)
SXADDQ $18, $17, $17
ST $f10, 0*SIZE($23)
ST $f11, 1*SIZE($23)
SXADDQ $18, $23, $23
ST $f12, 0*SIZE($23)
ST $f13, 1*SIZE($23)
SXADDQ $18, $23, $23
ST $f14, 0*SIZE($23)
ST $f15, 1*SIZE($23)
SXADDQ $18, $23, $23
ST $f16, 0*SIZE($23)
ST $f17, 1*SIZE($23)
SXADDQ $18, $23, $23
ST $f20, 0*SIZE($24)
ST $f21, 1*SIZE($24)
SXADDQ $20, $24, $24
ST $f22, 0*SIZE($24)
ST $f23, 1*SIZE($24)
SXADDQ $20, $24, $24
ST $f24, 0*SIZE($24)
ST $f25, 1*SIZE($24)
SXADDQ $20, $24, $24
ST $f26, 0*SIZE($24)
ST $f27, 1*SIZE($24)
SXADDQ $20, $24, $24
subl $21, 1, $21
bgt $21, $SubLoop
.align 4
$SubRemain:
ble $22, $SubEnd
.align 4
$SubRemainLoop:
LD $f10, 0*SIZE($19)
LD $f11, 1*SIZE($19)
LD $f20, 0*SIZE($17)
LD $f21, 1*SIZE($17)
subl $22, 1, $22
ST $f10, 0*SIZE($17)
ST $f11, 1*SIZE($17)
ST $f20, 0*SIZE($19)
ST $f21, 1*SIZE($19)
SXADDQ $18, $17, $17
SXADDQ $20, $19, $19
bgt $22, $SubRemainLoop
.align 4
$SubEnd:
clr $0
ret
EPILOGUE

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff