Merge pull request #11 from xianyi/develop

sync with upstream
This commit is contained in:
Martin Kroeker 2019-10-08 08:32:52 +02:00 committed by GitHub
commit 17609f88f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 54 additions and 41 deletions

View File

@ -162,16 +162,16 @@ matrix:
before_script: before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
- brew update - brew update
- brew install gcc # for gfortran - brew install gcc@8 # for gfortran
script: script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env: env:
- BTYPE="BINARY=64 INTERFACE64=1" - BTYPE="BINARY=64 INTERFACE64=1 FC=gfortran-8"
- <<: *test-macos - <<: *test-macos
osx_image: xcode8.3 osx_image: xcode8.3
env: env:
- BTYPE="BINARY=32" - BTYPE="BINARY=32 FC=gfortran-8"
# whitelist # whitelist
branches: branches:

View File

@ -103,12 +103,14 @@ static inline int blas_quickdivide(blasint x, blasint y){
#if defined(ASSEMBLER) && !defined(NEEDPARAM) #if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \ .macro PROLOGUE
.text ;\ .text ;
.align 4 ;\ .p2align 2 ;
.global REALNAME ;\ .global REALNAME ;
.type REALNAME, %function ;\ .type REALNAME, %function ;
REALNAME: REALNAME:
.endm
#define EPILOGUE #define EPILOGUE

View File

@ -54,37 +54,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(DOUBLE) #if !defined(DOUBLE)
ldr s4, [X], #4 ldr s4, [X], #4
fcmp s4, REGZERO fcmp s4, REGZERO
beq KERNEL_F1_NEXT_\@ beq 2f /* KERNEL_F1_NEXT_\@ */
fabs s4, s4 fabs s4, s4
fcmp SCALE, s4 fcmp SCALE, s4
bge KERNEL_F1_SCALE_GE_X_\@ bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
fdiv s2, SCALE, s4 fdiv s2, SCALE, s4
fmul s2, s2, s2 fmul s2, s2, s2
fmul s3, SSQ, s2 fmul s3, SSQ, s2
fadd SSQ, REGONE, s3 fadd SSQ, REGONE, s3
fmov SCALE, s4 fmov SCALE, s4
b KERNEL_F1_NEXT_\@ b 2f /* KERNEL_F1_NEXT_\@ */
KERNEL_F1_SCALE_GE_X_\@: 1: /* KERNEL_F1_SCALE_GE_X_\@: */
fdiv s2, s4, SCALE fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0] fmla SSQ, s2, v2.s[0]
#else #else
ldr d4, [X], #8 ldr d4, [X], #8
fcmp d4, REGZERO fcmp d4, REGZERO
beq KERNEL_F1_NEXT_\@ beq 2f /* KERNEL_F1_NEXT_\@ */
fabs d4, d4 fabs d4, d4
fcmp SCALE, d4 fcmp SCALE, d4
bge KERNEL_F1_SCALE_GE_X_\@ bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
fdiv d2, SCALE, d4 fdiv d2, SCALE, d4
fmul d2, d2, d2 fmul d2, d2, d2
fmul d3, SSQ, d2 fmul d3, SSQ, d2
fadd SSQ, REGONE, d3 fadd SSQ, REGONE, d3
fmov SCALE, d4 fmov SCALE, d4
b KERNEL_F1_NEXT_\@ b 2f /* KERNEL_F1_NEXT_\@ */
KERNEL_F1_SCALE_GE_X_\@: 1: /* KERNEL_F1_SCALE_GE_X_\@: */
fdiv d2, d4, SCALE fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0] fmla SSQ, d2, v2.d[0]
#endif #endif
KERNEL_F1_NEXT_\@: 2: /* KERNEL_F1_NEXT_\@: */
.endm .endm
.macro KERNEL_S1 .macro KERNEL_S1

View File

@ -54,69 +54,69 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(DOUBLE) #if !defined(DOUBLE)
ldr s4, [X], #4 ldr s4, [X], #4
fcmp s4, REGZERO fcmp s4, REGZERO
beq KERNEL_F1_NEXT_\@ beq 2f /* KERNEL_F1_NEXT_\@ */
fabs s4, s4 fabs s4, s4
fcmp SCALE, s4 fcmp SCALE, s4
bge KERNEL_F1_SCALE_GE_XR_\@ bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */
fdiv s2, SCALE, s4 fdiv s2, SCALE, s4
fmul s2, s2, s2 fmul s2, s2, s2
fmul s3, SSQ, s2 fmul s3, SSQ, s2
fadd SSQ, REGONE, s3 fadd SSQ, REGONE, s3
fmov SCALE, s4 fmov SCALE, s4
b KERNEL_F1_NEXT_\@ b 2f /* KERNEL_F1_NEXT_\@ */
KERNEL_F1_SCALE_GE_XR_\@: 1: /* KERNEL_F1_SCALE_GE_XR_\@: */
fdiv s2, s4, SCALE fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0] fmla SSQ, s2, v2.s[0]
KERNEL_F1_NEXT_\@: 2: /* KERNEL_F1_NEXT_\@: */
ldr s5, [X], #4 ldr s5, [X], #4
fcmp s5, REGZERO fcmp s5, REGZERO
beq KERNEL_F1_END_\@ beq 4f /* KERNEL_F1_END_\@ */
fabs s5, s5 fabs s5, s5
fcmp SCALE, s5 fcmp SCALE, s5
bge KERNEL_F1_SCALE_GE_XI_\@ bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */
fdiv s2, SCALE, s5 fdiv s2, SCALE, s5
fmul s2, s2, s2 fmul s2, s2, s2
fmul s3, SSQ, s2 fmul s3, SSQ, s2
fadd SSQ, REGONE, s3 fadd SSQ, REGONE, s3
fmov SCALE, s5 fmov SCALE, s5
b KERNEL_F1_END_\@ b 4f /* KERNEL_F1_END_\@ */
KERNEL_F1_SCALE_GE_XI_\@: 3: /* KERNEL_F1_SCALE_GE_XI_\@: */
fdiv s2, s5, SCALE fdiv s2, s5, SCALE
fmla SSQ, s2, v2.s[0] fmla SSQ, s2, v2.s[0]
#else #else
ldr d4, [X], #8 ldr d4, [X], #8
fcmp d4, REGZERO fcmp d4, REGZERO
beq KERNEL_F1_NEXT_\@ beq 2f /* KERNEL_F1_NEXT_\@ */
fabs d4, d4 fabs d4, d4
fcmp SCALE, d4 fcmp SCALE, d4
bge KERNEL_F1_SCALE_GE_XR_\@ bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */
fdiv d2, SCALE, d4 fdiv d2, SCALE, d4
fmul d2, d2, d2 fmul d2, d2, d2
fmul d3, SSQ, d2 fmul d3, SSQ, d2
fadd SSQ, REGONE, d3 fadd SSQ, REGONE, d3
fmov SCALE, d4 fmov SCALE, d4
b KERNEL_F1_NEXT_\@ b 2f /* KERNEL_F1_NEXT_\@ */
KERNEL_F1_SCALE_GE_XR_\@: 1: /* KERNEL_F1_SCALE_GE_XR_\@: */
fdiv d2, d4, SCALE fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0] fmla SSQ, d2, v2.d[0]
KERNEL_F1_NEXT_\@: 2: /* KERNEL_F1_NEXT_\@: */
ldr d5, [X], #8 ldr d5, [X], #8
fcmp d5, REGZERO fcmp d5, REGZERO
beq KERNEL_F1_END_\@ beq 4f /* KERNEL_F1_END_\@ */
fabs d5, d5 fabs d5, d5
fcmp SCALE, d5 fcmp SCALE, d5
bge KERNEL_F1_SCALE_GE_XI_\@ bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */
fdiv d2, SCALE, d5 fdiv d2, SCALE, d5
fmul d2, d2, d2 fmul d2, d2, d2
fmul d3, SSQ, d2 fmul d3, SSQ, d2
fadd SSQ, REGONE, d3 fadd SSQ, REGONE, d3
fmov SCALE, d5 fmov SCALE, d5
b KERNEL_F1_END_\@ b 4f /* KERNEL_F1_END_\@ */
KERNEL_F1_SCALE_GE_XI_\@: 3: /* KERNEL_F1_SCALE_GE_XI_\@: */
fdiv d2, d5, SCALE fdiv d2, d5, SCALE
fmla SSQ, d2, v2.d[0] fmla SSQ, d2, v2.d[0]
#endif #endif
KERNEL_F1_END_\@: 4: /* KERNEL_F1_END_\@: */
.endm .endm
.macro KERNEL_S1 .macro KERNEL_S1

View File

@ -34,9 +34,9 @@ caxpy_k:
lfs 0,4(10) lfs 0,4(10)
fmuls 10,2,10 fmuls 10,2,10
#ifdef CONJ #ifdef CONJ
fmsubs 11,11,1,10
#else
fmadds 11,11,1,10 fmadds 11,11,1,10
#else
fmsubs 11,11,1,10
#endif #endif
fadds 12,12,11 fadds 12,12,11
stfs 12,0(10) stfs 12,0(10)
@ -241,8 +241,13 @@ caxpy_k:
lfsx 12,8,5 lfsx 12,8,5
lfsx 0,10,5 lfsx 0,10,5
fmuls 11,2,11 fmuls 11,2,11
#ifdef CONJ
fmsubs 12,1,12,11 fmsubs 12,1,12,11
fsubs 0,0,12 fsubs 0,0,12
#else
fmadds 12,1,12,11
fadds 0,0,12
#endif
stfsx 0,10,5 stfsx 0,10,5
ble 7,.L39 ble 7,.L39
sldi 6,6,2 sldi 6,6,2

View File

@ -1,3 +1,6 @@
#define ASSEMBLER
#include "common.h"
/*
.file "cdot.c" .file "cdot.c"
.abiversion 2 .abiversion 2
.section ".text" .section ".text"
@ -5,6 +8,9 @@
.p2align 4,,15 .p2align 4,,15
.globl cdot_k .globl cdot_k
.type cdot_k, @function .type cdot_k, @function
*/
PROLOGUE
cdot_k: cdot_k:
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha

View File

@ -136,8 +136,8 @@ LSGEMM_L8x16_BEGIN:
#endif #endif
ZERO8x16 ZERO8x16
mtctr L
ble LSGEMM_L8x16_SUB0 ble LSGEMM_L8x16_SUB0
mtctr L
bl LSGEMM_L8x16_LMAIN_SUB bl LSGEMM_L8x16_LMAIN_SUB
andi. L, T12, 127 andi. L, T12, 127
ble LSGEMM_L8x16_SAVE ble LSGEMM_L8x16_SAVE
@ -146,7 +146,7 @@ LSGEMM_L8x16_BEGIN:
LSGEMM_L8x16_SUB0: LSGEMM_L8x16_SUB0:
#if defined(TRMMKERNEL) #if defined(TRMMKERNEL)
andi. L, T11, 255 andi. L, T11, 255
cmpwi T11,128 cmpwi T11,129
#else #else
andi. L, K, 255 andi. L, K, 255
cmpwi K,129 cmpwi K,129