diff --git a/.travis.yml b/.travis.yml index 2b1b99b26..72e29091d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -162,16 +162,16 @@ matrix: before_script: - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" - brew update - - brew install gcc # for gfortran + - brew install gcc@8 # for gfortran script: - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE env: - - BTYPE="BINARY=64 INTERFACE64=1" + - BTYPE="BINARY=64 INTERFACE64=1 FC=gfortran-8" - <<: *test-macos osx_image: xcode8.3 env: - - BTYPE="BINARY=32" + - BTYPE="BINARY=32 FC=gfortran-8" # whitelist branches: diff --git a/common_arm64.h b/common_arm64.h index c6ef2fb5d..c5e6948dc 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -103,12 +103,14 @@ static inline int blas_quickdivide(blasint x, blasint y){ #if defined(ASSEMBLER) && !defined(NEEDPARAM) -#define PROLOGUE \ - .text ;\ - .align 4 ;\ - .global REALNAME ;\ - .type REALNAME, %function ;\ +.macro PROLOGUE + .text ; + .p2align 2 ; + .global REALNAME ; + .type REALNAME, %function ; REALNAME: +.endm + #define EPILOGUE diff --git a/kernel/arm64/nrm2.S b/kernel/arm64/nrm2.S index e2cbd4def..0e5a8eed1 100644 --- a/kernel/arm64/nrm2.S +++ b/kernel/arm64/nrm2.S @@ -54,37 +54,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if !defined(DOUBLE) ldr s4, [X], #4 fcmp s4, REGZERO - beq KERNEL_F1_NEXT_\@ + beq 2f /* KERNEL_F1_NEXT_\@ */ fabs s4, s4 fcmp SCALE, s4 - bge KERNEL_F1_SCALE_GE_X_\@ + bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ fdiv s2, SCALE, s4 fmul s2, s2, s2 fmul s3, SSQ, s2 fadd SSQ, REGONE, s3 fmov SCALE, s4 - b KERNEL_F1_NEXT_\@ -KERNEL_F1_SCALE_GE_X_\@: + b 2f /* KERNEL_F1_NEXT_\@ */ +1: /* KERNEL_F1_SCALE_GE_X_\@: */ fdiv s2, s4, SCALE fmla SSQ, s2, v2.s[0] #else ldr d4, [X], #8 fcmp d4, REGZERO - beq KERNEL_F1_NEXT_\@ + beq 2f /* KERNEL_F1_NEXT_\@ */ fabs d4, d4 fcmp SCALE, d4 - bge KERNEL_F1_SCALE_GE_X_\@ + bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ fdiv d2, SCALE, d4 fmul d2, d2, d2 fmul d3, SSQ, d2 fadd SSQ, REGONE, d3 fmov SCALE, d4 - b KERNEL_F1_NEXT_\@ -KERNEL_F1_SCALE_GE_X_\@: + b 2f /* KERNEL_F1_NEXT_\@ */ +1: /* KERNEL_F1_SCALE_GE_X_\@: */ fdiv d2, d4, SCALE fmla SSQ, d2, v2.d[0] #endif -KERNEL_F1_NEXT_\@: +2: /* KERNEL_F1_NEXT_\@: */ .endm .macro KERNEL_S1 diff --git a/kernel/arm64/znrm2.S b/kernel/arm64/znrm2.S index 1c89685ea..ce3f7d4ed 100644 --- a/kernel/arm64/znrm2.S +++ b/kernel/arm64/znrm2.S @@ -54,69 +54,69 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if !defined(DOUBLE) ldr s4, [X], #4 fcmp s4, REGZERO - beq KERNEL_F1_NEXT_\@ + beq 2f /* KERNEL_F1_NEXT_\@ */ fabs s4, s4 fcmp SCALE, s4 - bge KERNEL_F1_SCALE_GE_XR_\@ + bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */ fdiv s2, SCALE, s4 fmul s2, s2, s2 fmul s3, SSQ, s2 fadd SSQ, REGONE, s3 fmov SCALE, s4 - b KERNEL_F1_NEXT_\@ -KERNEL_F1_SCALE_GE_XR_\@: + b 2f /* KERNEL_F1_NEXT_\@ */ +1: /* KERNEL_F1_SCALE_GE_XR_\@: */ fdiv s2, s4, SCALE fmla SSQ, s2, v2.s[0] -KERNEL_F1_NEXT_\@: +2: /* KERNEL_F1_NEXT_\@: */ ldr s5, [X], #4 fcmp s5, REGZERO - beq KERNEL_F1_END_\@ + beq 4f /* KERNEL_F1_END_\@ */ fabs s5, s5 fcmp SCALE, s5 - bge KERNEL_F1_SCALE_GE_XI_\@ + bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */ fdiv s2, SCALE, s5 fmul s2, s2, s2 fmul s3, SSQ, s2 fadd SSQ, REGONE, s3 fmov SCALE, s5 - b KERNEL_F1_END_\@ -KERNEL_F1_SCALE_GE_XI_\@: + b 4f /* KERNEL_F1_END_\@ */ +3: /* KERNEL_F1_SCALE_GE_XI_\@: */ fdiv s2, s5, SCALE fmla SSQ, s2, v2.s[0] #else ldr d4, [X], #8 fcmp d4, REGZERO - beq KERNEL_F1_NEXT_\@ + beq 2f /* KERNEL_F1_NEXT_\@ */ fabs d4, d4 fcmp SCALE, d4 - bge KERNEL_F1_SCALE_GE_XR_\@ + bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */ fdiv d2, SCALE, d4 fmul d2, d2, d2 fmul d3, SSQ, d2 fadd SSQ, REGONE, d3 fmov SCALE, d4 - b KERNEL_F1_NEXT_\@ -KERNEL_F1_SCALE_GE_XR_\@: + b 2f /* KERNEL_F1_NEXT_\@ */ +1: /* KERNEL_F1_SCALE_GE_XR_\@: */ fdiv d2, d4, SCALE fmla SSQ, d2, v2.d[0] -KERNEL_F1_NEXT_\@: +2: /* KERNEL_F1_NEXT_\@: */ ldr d5, [X], #8 fcmp d5, REGZERO - beq KERNEL_F1_END_\@ + beq 4f /* KERNEL_F1_END_\@ */ fabs d5, d5 fcmp SCALE, d5 - bge KERNEL_F1_SCALE_GE_XI_\@ + bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */ fdiv d2, SCALE, d5 fmul d2, d2, d2 fmul d3, SSQ, d2 fadd SSQ, REGONE, d3 fmov SCALE, d5 - b KERNEL_F1_END_\@ -KERNEL_F1_SCALE_GE_XI_\@: + b 4f /* KERNEL_F1_END_\@ */ +3: /* KERNEL_F1_SCALE_GE_XI_\@: */ fdiv d2, d5, SCALE fmla SSQ, d2, v2.d[0] #endif -KERNEL_F1_END_\@: +4: /* KERNEL_F1_END_\@: */ .endm .macro KERNEL_S1 diff --git a/kernel/power/caxpy_power8.S b/kernel/power/caxpy_power8.S index 09a423571..0ce61ca3b 100644 --- a/kernel/power/caxpy_power8.S +++ b/kernel/power/caxpy_power8.S @@ -34,9 +34,9 @@ caxpy_k: lfs 0,4(10) fmuls 10,2,10 #ifdef CONJ - fmsubs 11,11,1,10 -#else fmadds 11,11,1,10 +#else + fmsubs 11,11,1,10 #endif fadds 12,12,11 stfs 12,0(10) @@ -241,8 +241,13 @@ caxpy_k: lfsx 12,8,5 lfsx 0,10,5 fmuls 11,2,11 +#ifdef CONJ fmsubs 12,1,12,11 fsubs 0,0,12 +#else + fmadds 12,1,12,11 + fadds 0,0,12 +#endif stfsx 0,10,5 ble 7,.L39 sldi 6,6,2 diff --git a/kernel/power/cdot_power9.S b/kernel/power/cdot_power9.S index 01d194c0c..9ec7cdd85 100644 --- a/kernel/power/cdot_power9.S +++ b/kernel/power/cdot_power9.S @@ -1,10 +1,16 @@ - .file "cdot.c" +#define ASSEMBLER +#include "common.h" +/* +.file "cdot.c" .abiversion 2 .section ".text" .align 2 .p2align 4,,15 .globl cdot_k .type cdot_k, @function +*/ + PROLOGUE + cdot_k: .LCF0: 0: addis 2,12,.TOC.-.LCF0@ha diff --git a/kernel/power/sgemm_logic_power9.S b/kernel/power/sgemm_logic_power9.S index 053836cbf..a34ed32b8 100644 --- a/kernel/power/sgemm_logic_power9.S +++ b/kernel/power/sgemm_logic_power9.S @@ -136,8 +136,8 @@ LSGEMM_L8x16_BEGIN: #endif ZERO8x16 - mtctr L ble LSGEMM_L8x16_SUB0 + mtctr L bl LSGEMM_L8x16_LMAIN_SUB andi. L, T12, 127 ble LSGEMM_L8x16_SAVE @@ -146,7 +146,7 @@ LSGEMM_L8x16_BEGIN: LSGEMM_L8x16_SUB0: #if defined(TRMMKERNEL) andi. L, T11, 255 - cmpwi T11,128 + cmpwi T11,129 #else andi. L, K, 255 cmpwi K,129