diff --git a/.travis.yml b/.travis.yml index 482b4f648..4bfdf485c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -233,6 +233,21 @@ matrix: - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1" - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1" + + - &test-graviton2 + os: linux + arch: arm64-graviton2 + dist: focal + group: edge + virt: lxd + compiler: gcc + addons: + apt: + packages: + - gfortran + script: + - travis_wait 45 make && make lapack-test + # whitelist branches: only: diff --git a/driver/level3/trmm_L.c b/driver/level3/trmm_L.c index ae8435d03..880de4df4 100644 --- a/driver/level3/trmm_L.c +++ b/driver/level3/trmm_L.c @@ -122,6 +122,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO if (min_l > GEMM_Q) min_l = GEMM_Q; min_i = min_l; if (min_i > GEMM_P) min_i = GEMM_P; + if( min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } START_RPCC(); @@ -161,9 +164,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } - for(is = min_i; is < min_l; is += GEMM_P){ + for(is = min_i; is < min_l; is += min_i){ min_i = min_l - is; if (min_i > GEMM_P) min_i = GEMM_P; + if( min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } START_RPCC(); @@ -192,6 +198,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO if (min_l > GEMM_Q) min_l = GEMM_Q; min_i = ls; if (min_i > GEMM_P) min_i = GEMM_P; + if( min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } + START_RPCC(); @@ -231,9 +241,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO STOP_RPCC(gemmcost); } - for(is = min_i; is < ls; is += GEMM_P){ + for(is = min_i; is < ls; is += min_i){ min_i = ls - is; if (min_i > GEMM_P) min_i = GEMM_P; + if( min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } START_RPCC(); @@ -256,9 +269,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO STOP_RPCC(gemmcost); } - for(is = ls; is < ls + min_l; is += GEMM_P){ + for(is = ls; is < ls + min_l; is += min_i){ min_i = ls + min_l - is; if (min_i > GEMM_P) min_i = GEMM_P; + if( min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } START_RPCC(); @@ -287,6 +303,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO if (min_l > GEMM_Q) min_l = GEMM_Q; min_i = min_l; if (min_i > GEMM_P) min_i = GEMM_P; + if (min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } + START_RPCC(); @@ -327,9 +347,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO STOP_RPCC(trmmcost); } - for(is = m - min_l + min_i; is < m; is += GEMM_P){ + for(is = m - min_l + min_i; is < m; is += min_i){ min_i = m - is; if (min_i > GEMM_P) min_i = GEMM_P; + if (min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } + + START_RPCC(); @@ -357,6 +382,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO if (min_l > GEMM_Q) min_l = GEMM_Q; min_i = min_l; if (min_i > GEMM_P) min_i = GEMM_P; + if (min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } + START_RPCC(); @@ -397,9 +426,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO STOP_RPCC(trmmcost); } - for(is = ls - min_l + min_i; is < ls; is += GEMM_P){ + for(is = ls - min_l + min_i; is < ls; is += min_i){ min_i = ls - is; if (min_i > GEMM_P) min_i = GEMM_P; + if (min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } + START_RPCC(); @@ -423,9 +456,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } - for(is = ls; is < m; is += GEMM_P){ + for(is = ls; is < m; is += min_i){ min_i = m - is; if (min_i > GEMM_P) min_i = GEMM_P; + if (min_i > GEMM_UNROLL_M){ + min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M; + } START_RPCC();