diff --git a/Makefile.zarch b/Makefile.zarch index be1e34f6d..b841d9b4d 100644 --- a/Makefile.zarch +++ b/Makefile.zarch @@ -8,3 +8,9 @@ ifeq ($(CORE), Z14) CCOMMON_OPT += -march=z14 -mzvector -O3 FCOMMON_OPT += -march=z14 -mzvector endif + +# Enable floating-point expression contraction for clang, since it is the +# default for gcc +ifeq ($(C_COMPILER), CLANG) +CCOMMON_OPT += -ffp-contract=fast +endif diff --git a/c_check b/c_check index 314c2b157..5ea93b75c 100644 --- a/c_check +++ b/c_check @@ -8,7 +8,7 @@ $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); $hostarch = `uname -p` if ($hostos eq "AIX"); $hostarch = "x86_64" if ($hostarch eq "amd64"); -$hostarch = "arm" if ($hostarch =~ /^arm.*/); +$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); $hostarch = "arm64" if ($hostarch eq "aarch64"); $hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); $hostarch = "zarch" if ($hostarch eq "s390x"); diff --git a/cmake/system.cmake b/cmake/system.cmake index e3617c4e2..c0f3c6ed2 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -110,6 +110,11 @@ if (NO_AVX2) set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2") endif () +if (NO_AVX512) + message(STATUS "Disabling Advanced Vector Extensions 512 (AVX512).") + set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX512") +endif () + if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(GETARCH_FLAGS "${GETARCH_FLAGS} ${CMAKE_C_FLAGS_DEBUG}") endif () diff --git a/cmake/system_check.cmake b/cmake/system_check.cmake index 511a7c7d1..d06f4779f 100644 --- a/cmake/system_check.cmake +++ b/cmake/system_check.cmake @@ -109,6 +109,7 @@ else() endif() if (X86_64 OR X86) +if (NOT NO_AVX512) file(WRITE ${PROJECT_BINARY_DIR}/avx512.c "#include \n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }") execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -c -v -o ${PROJECT_BINARY_DIR}/avx512.o ${PROJECT_BINARY_DIR}/avx512.c OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512) if (NO_AVX512 EQUAL 1) @@ -116,6 +117,7 @@ set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512") endif() file(REMOVE "avx512.c" "avx512.o") endif() +endif() include(CheckIncludeFile) CHECK_INCLUDE_FILE("stdatomic.h" HAVE_C11) diff --git a/kernel/zarch/camax.c b/kernel/zarch/camax.c index b10ca4752..018a9a9c0 100644 --- a/kernel/zarch/camax.c +++ b/kernel/zarch/camax.c @@ -136,7 +136,7 @@ static FLOAT camax_kernel_32(BLASLONG n, FLOAT *x) { "wfmaxsb %%v0,%%v0,%%v16,0\n\t" "ler %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/camin.c b/kernel/zarch/camin.c index 40945fae8..7b3b36630 100644 --- a/kernel/zarch/camin.c +++ b/kernel/zarch/camin.c @@ -136,7 +136,7 @@ static FLOAT camin_kernel_32(BLASLONG n, FLOAT *x) { "wfminsb %%v0,%%v0,%%v16,0\n\t" "ler %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/casum.c b/kernel/zarch/casum.c index e28f2018c..f3b9ed628 100644 --- a/kernel/zarch/casum.c +++ b/kernel/zarch/casum.c @@ -108,7 +108,7 @@ static FLOAT casum_kernel_32(BLASLONG n, FLOAT *x) { "vfasb %%v24,%%v24,%%v25\n\t" "vstef %%v24,%[asum],0" : [asum] "=Q"(asum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/caxpy.c b/kernel/zarch/caxpy.c index 14a124ae2..c0a7a71f4 100644 --- a/kernel/zarch/caxpy.c +++ b/kernel/zarch/caxpy.c @@ -99,9 +99,9 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) { "vst %%v19,112(%%r1,%[y])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/ccopy.c b/kernel/zarch/ccopy.c index 0a5e03992..9e08edc3b 100644 --- a/kernel/zarch/ccopy.c +++ b/kernel/zarch/ccopy.c @@ -36,9 +36,9 @@ static void ccopy_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "la %[x],256(%[x])\n\t" "la %[y],256(%[y])\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n * 2]; } *) y),[x] "+&a"(x),[y] "+&a"(y), + : "=m"(*(FLOAT (*)[n * 2]) y),[x] "+&a"(x),[y] "+&a"(y), [n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x) + : "m"(*(const FLOAT (*)[n * 2]) x) : "cc"); } diff --git a/kernel/zarch/cdot.c b/kernel/zarch/cdot.c index d90f9c871..0d6dfbeb1 100644 --- a/kernel/zarch/cdot.c +++ b/kernel/zarch/cdot.c @@ -97,9 +97,9 @@ static void cdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) { "vstef %%v24,4(%[d]),1\n\t" "vstef %%v25,8(%[d]),1\n\t" "vstef %%v25,12(%[d]),0" - : "=m"(*(struct { FLOAT x[4]; } *) d),[n] "+&r"(n) - : [d] "a"(d), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n * 2]; } *) y),[y] "a"(y) + : "=m"(*(FLOAT (*)[4]) d),[n] "+&r"(n) + : [d] "a"(d), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n * 2]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/cgemv_n_4.c b/kernel/zarch/cgemv_n_4.c index 5c36bc338..5fdf7717e 100644 --- a/kernel/zarch/cgemv_n_4.c +++ b/kernel/zarch/cgemv_n_4.c @@ -146,12 +146,12 @@ static void cgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vst %%v0,0(%%r1,%[y])\n\t" "agfi %%r1,16\n\t" "brctg %[n],0b\n\t" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[8]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n * 2]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[8]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -238,10 +238,10 @@ static void cgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vst %%v0,0(%%r1,%[y])\n\t" "agfi %%r1,16\n\t" "brctg %[n],0b\n\t" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[4]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[4]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } @@ -307,9 +307,9 @@ static void cgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) { "vst %%v0,0(%%r1,%[y])\n\t" "agfi %%r1,16\n\t" "brctg %[n],0b\n\t" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap),[ap] "a"(ap), - "m"(*(const struct { FLOAT x[2]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap),[ap] "a"(ap), + "m"(*(const FLOAT (*)[2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19"); } @@ -350,8 +350,8 @@ static void add_y_4(BLASLONG n, FLOAT *src, FLOAT *dest, FLOAT alpha_r, "vst %%v23,16(%%r1,%[dest])\n\t" "agfi %%r1,32\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) dest),[n] "+&r"(n) - : [dest] "a"(dest), "m"(*(const struct { FLOAT x[n * 2]; } *) src), + : "+m"(*(FLOAT (*)[n * 2]) dest),[n] "+&r"(n) + : [dest] "a"(dest), "m"(*(const FLOAT (*)[n * 2]) src), [src] "a"(src),[alpha_r] "Q"(alpha_r),[alpha_i] "Q"(alpha_i) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); diff --git a/kernel/zarch/cgemv_t_4.c b/kernel/zarch/cgemv_t_4.c index e10edfab0..2bdac9ea1 100644 --- a/kernel/zarch/cgemv_t_4.c +++ b/kernel/zarch/cgemv_t_4.c @@ -159,13 +159,13 @@ static void cgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "vfmasb %%v23,%%v19,%%v21,%%v23\n\t" "vst %%v22,0(%[y])\n\t" "vst %%v23,16(%[y])" - : "+m"(*(struct { FLOAT x[8]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[8]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n * 2]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -271,11 +271,11 @@ static void cgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "vfmasb %%v20,%%v16,%%v18,%%v20\n\t" "vfmasb %%v20,%%v17,%%v19,%%v20\n\t" "vst %%v20,0(%[y])" - : "+m"(*(struct { FLOAT x[4]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[4]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } @@ -361,10 +361,10 @@ static void cgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, "vfmasb %%v0,%%v16,%%v18,%%v0\n\t" "vfmasb %%v0,%%v17,%%v19,%%v0\n\t" "vsteg %%v0,0(%[y]),0" - : "+m"(*(struct { FLOAT x[2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap),[ap] "a"(ap), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap),[ap] "a"(ap), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); } diff --git a/kernel/zarch/crot.c b/kernel/zarch/crot.c index aab155f8b..5a0990f3d 100644 --- a/kernel/zarch/crot.c +++ b/kernel/zarch/crot.c @@ -169,8 +169,8 @@ static void crot_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *c, FLOAT *s) { "vst %%v23, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x), - "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n * 2]) x), + "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) : [x] "a"(x),[y] "a"(y),[c] "Q"(*c),[s] "Q"(*s) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/cscal.c b/kernel/zarch/cscal.c index 9fc54cf29..f9e89a452 100644 --- a/kernel/zarch/cscal.c +++ b/kernel/zarch/cscal.c @@ -80,8 +80,8 @@ static void cscal_kernel_16(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", @@ -132,8 +132,8 @@ static void cscal_kernel_16_zero_r(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); @@ -171,8 +171,8 @@ static void cscal_kernel_16_zero_i(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); @@ -194,7 +194,7 @@ static void cscal_kernel_16_zero(BLASLONG n, FLOAT *x) { "vst %%v0,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) + : "=m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) : [x] "a"(x) : "cc", "r1", "v0"); } diff --git a/kernel/zarch/csum.c b/kernel/zarch/csum.c index e9413da8e..b076501aa 100644 --- a/kernel/zarch/csum.c +++ b/kernel/zarch/csum.c @@ -90,7 +90,7 @@ static FLOAT csum_kernel_32(BLASLONG n, FLOAT *x) { "vfasb %%v24,%%v24,%%v25\n\t" "vstef %%v24,%[sum],0" : [sum] "=Q"(sum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/cswap.c b/kernel/zarch/cswap.c index 198994e18..f3ab77ab5 100644 --- a/kernel/zarch/cswap.c +++ b/kernel/zarch/cswap.c @@ -99,8 +99,8 @@ static void cswap_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "vst %%v31, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x), - "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n * 2]) x), + "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) : [x] "a"(x),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", diff --git a/kernel/zarch/ctrmm4x4V.S b/kernel/zarch/ctrmm4x4V.S index c0e4df17d..123f2ead0 100644 --- a/kernel/zarch/ctrmm4x4V.S +++ b/kernel/zarch/ctrmm4x4V.S @@ -198,7 +198,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,4 nill LOCAL_VAR1,3 #else - la LOCAL_VAR1,3(0,0) + lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x4_BK_Store @@ -254,7 +254,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x4_BK_Store @@ -305,7 +305,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x4_BK_Store @@ -385,7 +385,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x2_BK_Store @@ -442,7 +442,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x2_BK_Store @@ -492,7 +492,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x2_BK_Store @@ -568,7 +568,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x1_BK_Store @@ -620,7 +620,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x1_BK_Store @@ -670,7 +670,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x1_BK_Store diff --git a/kernel/zarch/damax.c b/kernel/zarch/damax.c index caacb50dc..d19181cbe 100644 --- a/kernel/zarch/damax.c +++ b/kernel/zarch/damax.c @@ -76,7 +76,7 @@ static FLOAT damax_kernel_32(BLASLONG n, FLOAT *x) { "wfmaxdb %%v0,%%v0,%%v16,8\n\t" "lpdr %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/damax_z13.c b/kernel/zarch/damax_z13.c index f3db4c108..5bc0d1721 100644 --- a/kernel/zarch/damax_z13.c +++ b/kernel/zarch/damax_z13.c @@ -110,7 +110,7 @@ static FLOAT damax_kernel_32(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/damin.c b/kernel/zarch/damin.c index 0163a144b..4e0558af4 100644 --- a/kernel/zarch/damin.c +++ b/kernel/zarch/damin.c @@ -76,7 +76,7 @@ static FLOAT damin_kernel_32(BLASLONG n, FLOAT *x) { "wfmindb %%v0,%%v0,%%v16,8\n\t" "lpdr %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/damin_z13.c b/kernel/zarch/damin_z13.c index 4196b2e15..a7efd4b26 100644 --- a/kernel/zarch/damin_z13.c +++ b/kernel/zarch/damin_z13.c @@ -110,7 +110,7 @@ static FLOAT damin_kernel_32(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dasum.c b/kernel/zarch/dasum.c index aa1382b10..9703cd3be 100644 --- a/kernel/zarch/dasum.c +++ b/kernel/zarch/dasum.c @@ -106,7 +106,7 @@ static FLOAT dasum_kernel_32(BLASLONG n, FLOAT *x) { "vfadb %%v24,%%v24,%%v25\n\t" "vsteg %%v24,%[asum],0" : [asum] "=Q"(asum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/daxpy.c b/kernel/zarch/daxpy.c index 5b0208c20..4e59ef7c6 100644 --- a/kernel/zarch/daxpy.c +++ b/kernel/zarch/daxpy.c @@ -100,8 +100,8 @@ static void daxpy_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) { "vst %%v27,240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), + : "+m"(*(FLOAT (*)[n]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), [alpha] "Q"(*alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dcopy.c b/kernel/zarch/dcopy.c index 691b90c64..3c546568f 100644 --- a/kernel/zarch/dcopy.c +++ b/kernel/zarch/dcopy.c @@ -36,8 +36,8 @@ static void dcopy_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "la %[x],256(%[x])\n\t" "la %[y],256(%[y])\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n]; } *) y),[x] "+&a"(x),[y] "+&a"(y),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x) + : "=m"(*(FLOAT (*)[n]) y),[x] "+&a"(x),[y] "+&a"(y),[n] "+&r"(n) + : "m"(*(const FLOAT (*)[n]) x) : "cc"); } diff --git a/kernel/zarch/ddot.c b/kernel/zarch/ddot.c index 9cad68f4b..c0ed8b72e 100644 --- a/kernel/zarch/ddot.c +++ b/kernel/zarch/ddot.c @@ -80,8 +80,8 @@ static FLOAT ddot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y) { "adbr %%f0,%%f1\n\t" "ldr %[dot],%%f0" : [dot] "=f"(dot),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n]; } *) y),[y] "a"(y) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dgemv_n_4.c b/kernel/zarch/dgemv_n_4.c index 502ba837e..e1c5c4472 100644 --- a/kernel/zarch/dgemv_n_4.c +++ b/kernel/zarch/dgemv_n_4.c @@ -169,13 +169,13 @@ static void dgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "agfi %%r1,32\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[4]; } *) x),[x] "a"(x),[alpha] "Q"(*alpha), + "nop 0" + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[4]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", @@ -274,11 +274,11 @@ static void dgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "agfi %%r1,32\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[2]; } *) x),[x] "a"(x),[alpha] "Q"(*alpha), + "nop 0" + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[2]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -351,9 +351,9 @@ static void dgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y, "agfi %%r1,32\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), + "nop 0" + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) a0),[a0] "a"(a0), "m"(*(const FLOAT (*)[1]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", diff --git a/kernel/zarch/dgemv_t_4.c b/kernel/zarch/dgemv_t_4.c index de72a1798..513cffe5a 100644 --- a/kernel/zarch/dgemv_t_4.c +++ b/kernel/zarch/dgemv_t_4.c @@ -173,12 +173,12 @@ static void dgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vrepg %%v4,%%v3,1\n\t" "adbr %%f3,%%f4\n\t" "std %%f3,24(%[y])" - : "=m"(*(struct { FLOAT x[4]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : "=m"(*(FLOAT (*)[4]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -280,10 +280,10 @@ static void dgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vrepg %%v2,%%v1,1\n\t" "adbr %%f1,%%f2\n\t" "std %%f1,8(%[y])" - : "=m"(*(struct { FLOAT x[2]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : "=m"(*(FLOAT (*)[2]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -360,8 +360,8 @@ static void dgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y) { "adbr %%f0,%%f1\n\t" "std %%f0,0(%[y])" : "=m"(*(FLOAT (*)[1]) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) a0),[a0] "a"(a0), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -438,9 +438,9 @@ static void add_y_kernel_4(BLASLONG n, FLOAT da, FLOAT *src, FLOAT *dest) { "agfi %%r1,32\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" - : "+m"(*(struct { FLOAT x[n]; } *) dest) - : [dest] "a"(dest),[da] "Q"(da), "m"(*(const struct { FLOAT x[n]; } *) src), + "nop 0" + : "+m"(*(FLOAT (*)[n]) dest) + : [dest] "a"(dest),[da] "Q"(da), "m"(*(const FLOAT (*)[n]) src), [src] "a"(src),[n] "r"(n) : "cc", "r0", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/dmax.c b/kernel/zarch/dmax.c index cdc8d5d08..4b76e0dd6 100644 --- a/kernel/zarch/dmax.c +++ b/kernel/zarch/dmax.c @@ -73,7 +73,7 @@ static FLOAT dmax_kernel_32(BLASLONG n, FLOAT *x) { "wfmaxdb %%v0,%%v0,%%v16,0\n\t" "ldr %[max],%%f0" : [max] "=f"(max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dmax_z13.c b/kernel/zarch/dmax_z13.c index c4e8d91f8..93acee2db 100644 --- a/kernel/zarch/dmax_z13.c +++ b/kernel/zarch/dmax_z13.c @@ -90,7 +90,7 @@ static FLOAT dmax_kernel_32(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[max],%%f0" : [max] "=f"(max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dmin.c b/kernel/zarch/dmin.c index f9b129cbd..21d55f323 100644 --- a/kernel/zarch/dmin.c +++ b/kernel/zarch/dmin.c @@ -73,7 +73,7 @@ static FLOAT dmin_kernel_32(BLASLONG n, FLOAT *x) { "wfmindb %%v0,%%v0,%%v16,0\n\t" "ldr %[min],%%f0" : [min] "=f"(min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dmin_z13.c b/kernel/zarch/dmin_z13.c index 77f021c1d..7d2dae3fb 100644 --- a/kernel/zarch/dmin_z13.c +++ b/kernel/zarch/dmin_z13.c @@ -90,7 +90,7 @@ static FLOAT dmin_kernel_32(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[min],%%f0" : [min] "=f"(min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/drot.c b/kernel/zarch/drot.c index 11fbe15b6..9d6d1a80d 100644 --- a/kernel/zarch/drot.c +++ b/kernel/zarch/drot.c @@ -169,7 +169,7 @@ static void drot_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *c, FLOAT *s) { "vst %%v23, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x), "+m"(*(struct { FLOAT x[n]; } *) y), + : "+m"(*(FLOAT (*)[n]) x), "+m"(*(FLOAT (*)[n]) y), [n] "+&r"(n) : [x] "a"(x),[y] "a"(y),[c] "Q"(*c),[s] "Q"(*s) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", diff --git a/kernel/zarch/dscal.c b/kernel/zarch/dscal.c index 2961eff20..a5a5e3468 100644 --- a/kernel/zarch/dscal.c +++ b/kernel/zarch/dscal.c @@ -59,7 +59,7 @@ static void dscal_kernel_16(BLASLONG n, FLOAT da, FLOAT *x) { "vst %%v31,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n]) x),[n] "+&r"(n) : [x] "a"(x),[da] "Q"(da) : "cc", "r1", "v0", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -81,7 +81,7 @@ static void dscal_kernel_16_zero(BLASLONG n, FLOAT *x) { "vst %%v0,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n]; } *) x),[n] "+&r"(n) + : "=m"(*(FLOAT (*)[n]) x),[n] "+&r"(n) : [x] "a"(x) : "cc", "r1", "v0"); } diff --git a/kernel/zarch/dsdot.c b/kernel/zarch/dsdot.c index 5fa88c3b9..2952bcf42 100644 --- a/kernel/zarch/dsdot.c +++ b/kernel/zarch/dsdot.c @@ -112,8 +112,8 @@ static double dsdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y) { "adbr %%f0,%%f1\n\t" "ldr %[dot],%%f0" : [dot] "=f"(dot),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n]; } *) y),[y] "a"(y) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dsum.c b/kernel/zarch/dsum.c index 8d44873c0..69b9f9b41 100644 --- a/kernel/zarch/dsum.c +++ b/kernel/zarch/dsum.c @@ -88,7 +88,7 @@ static FLOAT dsum_kernel_32(BLASLONG n, FLOAT *x) { "vfadb %%v24,%%v24,%%v25\n\t" "vsteg %%v24,%[sum],0" : [sum] "=Q"(sum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dswap.c b/kernel/zarch/dswap.c index f0c9ded51..46cbbba23 100644 --- a/kernel/zarch/dswap.c +++ b/kernel/zarch/dswap.c @@ -99,7 +99,7 @@ static void dswap_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "vst %%v31, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x), "+m"(*(struct { FLOAT x[n]; } *) y), + : "+m"(*(FLOAT (*)[n]) x), "+m"(*(FLOAT (*)[n]) y), [n] "+&r"(n) : [x] "a"(x),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", diff --git a/kernel/zarch/gemm8x4V.S b/kernel/zarch/gemm8x4V.S index 27fd5f57b..633e60ea6 100644 --- a/kernel/zarch/gemm8x4V.S +++ b/kernel/zarch/gemm8x4V.S @@ -147,7 +147,7 @@ brctg LOCAL_VAR1,.L8x4_4_BK ALIGN_4 .L8x4_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L8x4_BK_Store @@ -183,7 +183,7 @@ brctg LOCAL_VAR1,.L4x4_4_BK ALIGN_4 .L4x4_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L4x4_BK_Store @@ -217,7 +217,7 @@ brctg LOCAL_VAR1,.L2x4_4_BK ALIGN_4 .L2x4_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L2x4_BK_Store @@ -252,7 +252,7 @@ brctg LOCAL_VAR1,.L1x4_4_BK ALIGN_4 .L1x4_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L1x4_BK_Store @@ -309,7 +309,7 @@ brctg LOCAL_VAR1,.L8x2_4_BK ALIGN_4 .L8x2_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L8x2_BK_Store @@ -346,7 +346,7 @@ brctg LOCAL_VAR1,.L4x2_4_BK ALIGN_4 .L4x2_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L4x2_BK_Store @@ -380,7 +380,7 @@ brctg LOCAL_VAR1,.L2x2_4_BK ALIGN_4 .L2x2_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L2x2_BK_Store @@ -415,7 +415,7 @@ brctg LOCAL_VAR1,.L1x2_4_BK ALIGN_4 .L1x2_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L1x2_BK_Store @@ -471,7 +471,7 @@ brctg LOCAL_VAR1,.L8x1_4_BK ALIGN_4 .L8x1_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L8x1_BK_Store @@ -508,7 +508,7 @@ brctg LOCAL_VAR1,.L4x1_4_BK ALIGN_4 .L4x1_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L4x1_BK_Store @@ -542,7 +542,7 @@ brctg LOCAL_VAR1,.L2x1_4_BK ALIGN_4 .L2x1_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L2x1_BK_Store @@ -577,7 +577,7 @@ brctg LOCAL_VAR1,.L1x1_4_BK ALIGN_4 .L1x1_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L1x1_BK_Store diff --git a/kernel/zarch/gemm_vec.c b/kernel/zarch/gemm_vec.c index 741c09431..ef0b1d1e3 100644 --- a/kernel/zarch/gemm_vec.c +++ b/kernel/zarch/gemm_vec.c @@ -172,7 +172,7 @@ static inline vector_float vec_load_hinted(FLOAT const *restrict a) { vector_float const *restrict addr = (vector_float const *restrict)a; vector_float y; -#if __GNUC__ < 9 +#if __GNUC__ < 9 && !defined(__clang__) // hex-encode vl %[out],%[addr],3 asm(".insn vrx,0xe70000003006,%[out],%[addr],3" : [ out ] "=v"(y) @@ -393,7 +393,7 @@ static inline void GEBP_block_16_4( * Note that we need to massage this particular "barrier" * depending on the gcc version. */ -#if __GNUC__ > 7 +#if __GNUC__ > 7 || defined(__clang__) #define BARRIER_READ_BEFORE_COMPUTE(SUFFIX) \ do { \ asm("" \ diff --git a/kernel/zarch/icamax.c b/kernel/zarch/icamax.c index a2546b812..459196d00 100644 --- a/kernel/zarch/icamax.c +++ b/kernel/zarch/icamax.c @@ -213,9 +213,9 @@ static BLASLONG icamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax) { "ste %%f0,%[amax]\n\t" "vlgvg %[iamax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/icamin.c b/kernel/zarch/icamin.c index 09654b742..9bcf3646b 100644 --- a/kernel/zarch/icamin.c +++ b/kernel/zarch/icamin.c @@ -213,9 +213,9 @@ static BLASLONG icamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin) { "ste %%f0,%[amin]\n\t" "vlgvg %[iamin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/idamax.c b/kernel/zarch/idamax.c index b292c1d15..0f53488d3 100644 --- a/kernel/zarch/idamax.c +++ b/kernel/zarch/idamax.c @@ -160,9 +160,9 @@ static BLASLONG idamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax) { "std %%f0,%[amax]\n\t" "vlgvg %[iamax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/idamin.c b/kernel/zarch/idamin.c index f9a8119e1..f48bde894 100644 --- a/kernel/zarch/idamin.c +++ b/kernel/zarch/idamin.c @@ -160,9 +160,9 @@ static BLASLONG idamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin) { "std %%f0,%[amin]\n\t" "vlgvg %[iamin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/idmax.c b/kernel/zarch/idmax.c index 8f283bc17..1fdf1fa02 100644 --- a/kernel/zarch/idmax.c +++ b/kernel/zarch/idmax.c @@ -140,9 +140,9 @@ static BLASLONG idmax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *max) { "std %%f0,%[max]\n\t" "vlgvg %[imax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [imax] "=r"(imax),[max] "=Q"(*max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/idmin.c b/kernel/zarch/idmin.c index e4b7bb4fe..282f26bbd 100644 --- a/kernel/zarch/idmin.c +++ b/kernel/zarch/idmin.c @@ -140,9 +140,9 @@ static BLASLONG idmin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *min) { "std %%f0,%[min]\n\t" "vlgvg %[imin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [imin] "=r"(imin),[min] "=Q"(*min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/isamax.c b/kernel/zarch/isamax.c index ac86435d7..a30a96412 100644 --- a/kernel/zarch/isamax.c +++ b/kernel/zarch/isamax.c @@ -204,9 +204,9 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax) { "ste %%f0,%[amax]\n\t" "vlgvg %[iamax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT(*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/isamin.c b/kernel/zarch/isamin.c index 3f2d039eb..b29027ff4 100644 --- a/kernel/zarch/isamin.c +++ b/kernel/zarch/isamin.c @@ -204,9 +204,9 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin) { "ste %%f0,%[amin]\n\t" "vlgvg %[iamin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/ismax.c b/kernel/zarch/ismax.c index 41172c1bd..3d751ff6b 100644 --- a/kernel/zarch/ismax.c +++ b/kernel/zarch/ismax.c @@ -184,9 +184,9 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max) { "ste %%f0,%[max]\n\t" "vlgvg %[imax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [imax] "=r"(imax),[max] "=Q"(*max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/ismin.c b/kernel/zarch/ismin.c index e2684df41..e57c0bfa6 100644 --- a/kernel/zarch/ismin.c +++ b/kernel/zarch/ismin.c @@ -184,9 +184,9 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min) { "ste %%f0,%[min]\n\t" "vlgvg %[imin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [imin] "=r"(imin),[min] "=Q"(*min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/izamax.c b/kernel/zarch/izamax.c index daca1d6f7..fda76f471 100644 --- a/kernel/zarch/izamax.c +++ b/kernel/zarch/izamax.c @@ -157,9 +157,9 @@ static BLASLONG izamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amax) { "std %%f0,%[amax]\n\t" "vlgvg %[iamax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); diff --git a/kernel/zarch/izamin.c b/kernel/zarch/izamin.c index 9ababb91f..412ab15ca 100644 --- a/kernel/zarch/izamin.c +++ b/kernel/zarch/izamin.c @@ -157,9 +157,9 @@ static BLASLONG izamin_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amin) { "std %%f0,%[amin]\n\t" "vlgvg %[iamin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); diff --git a/kernel/zarch/samax.c b/kernel/zarch/samax.c index fdda6dd32..20da4406a 100644 --- a/kernel/zarch/samax.c +++ b/kernel/zarch/samax.c @@ -78,7 +78,7 @@ static FLOAT samax_kernel_64(BLASLONG n, FLOAT *x) { "wfmaxsb %%v0,%%v0,%%v16,8\n\t" "lper %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/samin.c b/kernel/zarch/samin.c index f05e851f9..e7e4fd9b7 100644 --- a/kernel/zarch/samin.c +++ b/kernel/zarch/samin.c @@ -78,7 +78,7 @@ static FLOAT samin_kernel_64(BLASLONG n, FLOAT *x) { "wfminsb %%v0,%%v0,%%v16,8\n\t" "lper %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/sasum.c b/kernel/zarch/sasum.c index d56f2697b..4cf74f351 100644 --- a/kernel/zarch/sasum.c +++ b/kernel/zarch/sasum.c @@ -108,7 +108,7 @@ static FLOAT sasum_kernel_64(BLASLONG n, FLOAT *x) { "vfasb %%v24,%%v24,%%v25\n\t" "vstef %%v24,%[asum],0" : [asum] "=Q"(asum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/saxpy.c b/kernel/zarch/saxpy.c index ca34a47ff..8bcb1a61b 100644 --- a/kernel/zarch/saxpy.c +++ b/kernel/zarch/saxpy.c @@ -100,8 +100,8 @@ static void saxpy_kernel_64(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) { "vst %%v27,240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), + : "+m"(*(FLOAT (*)[n]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), [alpha] "Q"(*alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/scopy.c b/kernel/zarch/scopy.c index 5c453cfbb..631c9f929 100644 --- a/kernel/zarch/scopy.c +++ b/kernel/zarch/scopy.c @@ -36,8 +36,8 @@ static void scopy_kernel_64(BLASLONG n, FLOAT *x, FLOAT *y) { "la %[x],256(%[x])\n\t" "la %[y],256(%[y])\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n]; } *) y),[x] "+&a"(x),[y] "+&a"(y),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x) + : "=m"(*(FLOAT (*)[n]) y),[x] "+&a"(x),[y] "+&a"(y),[n] "+&r"(n) + : "m"(*(const FLOAT (*)[n]) x) : "cc"); } diff --git a/kernel/zarch/sdot.c b/kernel/zarch/sdot.c index d870b30f0..d27c17162 100644 --- a/kernel/zarch/sdot.c +++ b/kernel/zarch/sdot.c @@ -84,8 +84,8 @@ static FLOAT sdot_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "aebr %%f0,%%f3\n\t" "ler %[dot],%%f0" : [dot] "=f"(dot),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n]; } *) y),[y] "a"(y) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/sgemv_n_4.c b/kernel/zarch/sgemv_n_4.c index a1efef373..b4cfb61de 100644 --- a/kernel/zarch/sgemv_n_4.c +++ b/kernel/zarch/sgemv_n_4.c @@ -159,13 +159,13 @@ static void sgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "agfi %%r1,16\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[4]; } *) x),[x] "a"(x),[alpha] "Q"(*alpha), + "nop 0" + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[4]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", @@ -258,11 +258,11 @@ static void sgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "agfi %%r1,16\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[2]; } *) x),[x] "a"(x),[alpha] "Q"(*alpha), + "nop 0" + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[2]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -331,9 +331,9 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y, "agfi %%r1,16\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), + "nop 0" + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) a0),[a0] "a"(a0), "m"(*(const FLOAT (*)[1]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", diff --git a/kernel/zarch/sgemv_t_4.c b/kernel/zarch/sgemv_t_4.c index 81d7c9fe7..3c708200c 100644 --- a/kernel/zarch/sgemv_t_4.c +++ b/kernel/zarch/sgemv_t_4.c @@ -172,12 +172,12 @@ static void sgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vrepg %%v4,%%v3,1\n\t" "aebr %%f3,%%f4\n\t" "ste %%f3,12(%[y])" - : "=m"(*(struct { FLOAT x[4]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : "=m"(*(FLOAT (*)[4]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -278,10 +278,10 @@ static void sgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vrepg %%v2,%%v1,1\n\t" "aebr %%f1,%%f2\n\t" "ste %%f1,4(%[y])" - : "=m"(*(struct { FLOAT x[2]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : "=m"(*(FLOAT (*)[2]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -357,8 +357,8 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y) { "aebr %%f0,%%f1\n\t" "ste %%f0,0(%[y])" : "=m"(*(FLOAT (*)[1]) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) a0),[a0] "a"(a0), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -431,9 +431,9 @@ static void add_y_kernel_4(BLASLONG n, FLOAT da, FLOAT *src, FLOAT *dest) { "agfi %%r1,16\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" - : "+m"(*(struct { FLOAT x[n]; } *) dest) - : [dest] "a"(dest),[da] "Q"(da), "m"(*(const struct { FLOAT x[n]; } *) src), + "nop 0" + : "+m"(*(FLOAT (*)[n]) dest) + : [dest] "a"(dest),[da] "Q"(da), "m"(*(const FLOAT (*)[n]) src), [src] "a"(src),[n] "r"(n) : "cc", "r0", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/smax.c b/kernel/zarch/smax.c index 7015aaa1d..0c7433cbc 100644 --- a/kernel/zarch/smax.c +++ b/kernel/zarch/smax.c @@ -75,7 +75,7 @@ static FLOAT smax_kernel_64(BLASLONG n, FLOAT *x) { "wfmaxsb %%v0,%%v0,%%v16,0\n\t" "ler %[max],%%f0" : [max] "=f"(max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT(*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/smin.c b/kernel/zarch/smin.c index b6875c5c6..5e0f3860d 100644 --- a/kernel/zarch/smin.c +++ b/kernel/zarch/smin.c @@ -75,7 +75,7 @@ static FLOAT smin_kernel_64(BLASLONG n, FLOAT *x) { "wfminsb %%v0,%%v0,%%v16,0\n\t" "ler %[min],%%f0" : [min] "=f"(min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/srot.c b/kernel/zarch/srot.c index 4f471d866..c235adcbe 100644 --- a/kernel/zarch/srot.c +++ b/kernel/zarch/srot.c @@ -169,7 +169,7 @@ static void srot_kernel_64(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *c, FLOAT *s) { "vst %%v23, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x), "+m"(*(struct { FLOAT x[n]; } *) y), + : "+m"(*(FLOAT (*)[n]) x), "+m"(*(FLOAT (*)[n]) y), [n] "+&r"(n) : [x] "a"(x),[y] "a"(y),[c] "Q"(*c),[s] "Q"(*s) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", diff --git a/kernel/zarch/sscal.c b/kernel/zarch/sscal.c index 9b9930dc8..da2f49eaf 100644 --- a/kernel/zarch/sscal.c +++ b/kernel/zarch/sscal.c @@ -59,7 +59,7 @@ static void sscal_kernel_32(BLASLONG n, FLOAT da, FLOAT *x) { "vst %%v31,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n]) x),[n] "+&r"(n) : [x] "a"(x),[da] "Q"(da) : "cc", "r1", "v0", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -81,7 +81,7 @@ static void sscal_kernel_32_zero(BLASLONG n, FLOAT *x) { "vst %%v0,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n]; } *) x),[n] "+&r"(n) + : "=m"(*(FLOAT (*)[n]) x),[n] "+&r"(n) : [x] "a"(x) : "cc", "r1", "v0"); } diff --git a/kernel/zarch/ssum.c b/kernel/zarch/ssum.c index 3f3f46a85..02aabdff6 100644 --- a/kernel/zarch/ssum.c +++ b/kernel/zarch/ssum.c @@ -91,7 +91,7 @@ static FLOAT ssum_kernel_64(BLASLONG n, FLOAT *x) { "vfasb %%v24,%%v24,%%v25\n\t" "vstef %%v24,%[sum],0" : [sum] "=Q"(sum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/sswap.c b/kernel/zarch/sswap.c index 0c62f189d..ec860765a 100644 --- a/kernel/zarch/sswap.c +++ b/kernel/zarch/sswap.c @@ -99,7 +99,7 @@ static void sswap_kernel_64(BLASLONG n, FLOAT *x, FLOAT *y) { "vst %%v31, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x), "+m"(*(struct { FLOAT x[n]; } *) y), + : "+m"(*(FLOAT (*)[n]) x), "+m"(*(FLOAT (*)[n]) y), [n] "+&r"(n) : [x] "a"(x),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", diff --git a/kernel/zarch/strmm8x4V.S b/kernel/zarch/strmm8x4V.S index f8e748167..e34a7a05a 100644 --- a/kernel/zarch/strmm8x4V.S +++ b/kernel/zarch/strmm8x4V.S @@ -186,7 +186,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,8,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L8x4_BK_Store @@ -239,7 +239,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,4 nill LOCAL_VAR1,3 #else - la LOCAL_VAR1,3(0,0) + lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x4_BK_Store @@ -290,7 +290,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x4_BK_Store @@ -341,7 +341,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x4_BK_Store @@ -423,7 +423,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,8,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L8x2_BK_Store @@ -475,7 +475,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x2_BK_Store @@ -525,7 +525,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x2_BK_Store @@ -575,7 +575,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x2_BK_Store @@ -655,7 +655,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,8,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L8x1_BK_Store @@ -708,7 +708,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x1_BK_Store @@ -757,7 +757,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x1_BK_Store @@ -807,7 +807,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x1_BK_Store diff --git a/kernel/zarch/zamax.c b/kernel/zarch/zamax.c index aa04ab91f..98e40d073 100644 --- a/kernel/zarch/zamax.c +++ b/kernel/zarch/zamax.c @@ -114,7 +114,7 @@ static FLOAT zamax_kernel_16(BLASLONG n, FLOAT *x) { "wfmaxdb %%v0,%%v0,%%v16,0\n\t" "ldr %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zamax_z13.c b/kernel/zarch/zamax_z13.c index 37278d6db..f727ad67a 100644 --- a/kernel/zarch/zamax_z13.c +++ b/kernel/zarch/zamax_z13.c @@ -123,7 +123,7 @@ static FLOAT zamax_kernel_16(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); diff --git a/kernel/zarch/zamin.c b/kernel/zarch/zamin.c index 0b5402853..2e43fefd9 100644 --- a/kernel/zarch/zamin.c +++ b/kernel/zarch/zamin.c @@ -114,7 +114,7 @@ static FLOAT zamin_kernel_16(BLASLONG n, FLOAT *x) { "wfmindb %%v0,%%v0,%%v16,0\n\t" "ldr %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zamin_z13.c b/kernel/zarch/zamin_z13.c index e37bb2236..e52802595 100644 --- a/kernel/zarch/zamin_z13.c +++ b/kernel/zarch/zamin_z13.c @@ -123,7 +123,7 @@ static FLOAT zamin_kernel_16(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); diff --git a/kernel/zarch/zasum.c b/kernel/zarch/zasum.c index aeef8d77e..0003f38a5 100644 --- a/kernel/zarch/zasum.c +++ b/kernel/zarch/zasum.c @@ -106,7 +106,7 @@ static FLOAT zasum_kernel_16(BLASLONG n, FLOAT *x) { "vfadb %%v24,%%v24,%%v25\n\t" "vsteg %%v24,%[asum],0" : [asum] "=Q"(asum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zaxpy.c b/kernel/zarch/zaxpy.c index 9363ec32d..f2c115597 100644 --- a/kernel/zarch/zaxpy.c +++ b/kernel/zarch/zaxpy.c @@ -95,9 +95,9 @@ static void zaxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) { "vst %%v19,112(%%r1,%[y])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zcopy.c b/kernel/zarch/zcopy.c index 5a46aec1c..d91d9f367 100644 --- a/kernel/zarch/zcopy.c +++ b/kernel/zarch/zcopy.c @@ -36,9 +36,9 @@ static void zcopy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y) { "la %[x],256(%[x])\n\t" "la %[y],256(%[y])\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n * 2]; } *) y),[x] "+&a"(x),[y] "+&a"(y), + : "=m"(*(FLOAT (*)[n * 2]) y),[x] "+&a"(x),[y] "+&a"(y), [n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x) + : "m"(*(const FLOAT (*)[n * 2]) x) : "cc"); } diff --git a/kernel/zarch/zdot.c b/kernel/zarch/zdot.c index ac6e69c23..6b7144101 100644 --- a/kernel/zarch/zdot.c +++ b/kernel/zarch/zdot.c @@ -93,9 +93,9 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) { "vsteg %%v24,8(%[d]),1\n\t" "vsteg %%v25,16(%[d]),1\n\t" "vsteg %%v25,24(%[d]),0" - : "=m"(*(struct { FLOAT x[4]; } *) d),[n] "+&r"(n) - : [d] "a"(d), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n * 2]; } *) y),[y] "a"(y) + : "=m"(*(FLOAT (*)[4]) d),[n] "+&r"(n) + : [d] "a"(d), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n * 2]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zgemv_n_4.c b/kernel/zarch/zgemv_n_4.c index 13045a359..2ef9b4de8 100644 --- a/kernel/zarch/zgemv_n_4.c +++ b/kernel/zarch/zgemv_n_4.c @@ -112,12 +112,12 @@ static void zgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vst %%v1,16(%%r1,%[y])\n\t" "agfi %%r1,32\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[8]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n * 2]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[8]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -172,10 +172,10 @@ static void zgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vst %%v1,16(%%r1,%[y])\n\t" "agfi %%r1,32\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[4]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[4]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); } @@ -210,9 +210,9 @@ static void zgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) { "vst %%v1,16(%%r1,%[y])\n\t" "agfi %%r1,32\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap),[ap] "a"(ap), - "m"(*(const struct { FLOAT x[2]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap),[ap] "a"(ap), + "m"(*(const FLOAT (*)[2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21"); } @@ -261,8 +261,8 @@ static void add_y_4(BLASLONG n, FLOAT *src, FLOAT *dest, FLOAT alpha_r, "vst %%v31,48(%%r1,%[dest])\n\t" "agfi %%r1,64\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) dest),[n] "+&r"(n) - : [dest] "a"(dest), "m"(*(const struct { FLOAT x[n * 2]; } *) src), + : "+m"(*(FLOAT (*)[n * 2]) dest),[n] "+&r"(n) + : [dest] "a"(dest), "m"(*(const FLOAT (*)[n * 2]) src), [src] "a"(src),[alpha_r] "Q"(alpha_r),[alpha_i] "Q"(alpha_i) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/zgemv_t_4.c b/kernel/zarch/zgemv_t_4.c index 031c31e29..c10769266 100644 --- a/kernel/zarch/zgemv_t_4.c +++ b/kernel/zarch/zgemv_t_4.c @@ -141,13 +141,13 @@ static void zgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "vst %%v27,16(%[y])\n\t" "vst %%v28,32(%[y])\n\t" "vst %%v29,48(%[y])" - : "+m"(*(struct { FLOAT x[8]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[8]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n * 2]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -229,11 +229,11 @@ static void zgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "vfmadb %%v23,%%v19,%%v21,%%v23\n\t" "vst %%v22,0(%[y])\n\t" "vst %%v23,16(%[y])\n\t" - : "+m"(*(struct { FLOAT x[4]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[4]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } @@ -294,10 +294,10 @@ static void zgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, "vfmadb %%v0,%%v16,%%v18,%%v0\n\t" "vfmadb %%v0,%%v17,%%v19,%%v0\n\t" "vst %%v0,0(%[y])\n\t" - : "+m"(*(struct { FLOAT x[2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap),[ap] "a"(ap), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap),[ap] "a"(ap), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19"); } diff --git a/kernel/zarch/zrot.c b/kernel/zarch/zrot.c index 6284d5a47..3b87e356a 100644 --- a/kernel/zarch/zrot.c +++ b/kernel/zarch/zrot.c @@ -169,8 +169,8 @@ static void zrot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *c, FLOAT *s) { "vst %%v23, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x), - "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n * 2]) x), + "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) : [x] "a"(x),[y] "a"(y),[c] "Q"(*c),[s] "Q"(*s) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/zscal.c b/kernel/zarch/zscal.c index e497a6d7b..a5a8f694d 100644 --- a/kernel/zarch/zscal.c +++ b/kernel/zarch/zscal.c @@ -78,8 +78,8 @@ static void zscal_kernel_8(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", @@ -128,8 +128,8 @@ static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); @@ -167,8 +167,8 @@ static void zscal_kernel_8_zero_i(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); @@ -190,7 +190,7 @@ static void zscal_kernel_8_zero(BLASLONG n, FLOAT *x) { "vst %%v0,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) + : "=m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) : [x] "a"(x) : "cc", "r1", "v0"); } diff --git a/kernel/zarch/zsum.c b/kernel/zarch/zsum.c index e0f978d87..b35832af8 100644 --- a/kernel/zarch/zsum.c +++ b/kernel/zarch/zsum.c @@ -89,7 +89,7 @@ static FLOAT zsum_kernel_16(BLASLONG n, FLOAT *x) { "vfadb %%v24,%%v24,%%v25\n\t" "vsteg %%v24,%[sum],0" : [sum] "=Q"(sum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zswap.c b/kernel/zarch/zswap.c index bc466866c..7a2d1f882 100644 --- a/kernel/zarch/zswap.c +++ b/kernel/zarch/zswap.c @@ -99,8 +99,8 @@ static void zswap_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y) { "vst %%v31, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x), - "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n * 2]) x), + "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) : [x] "a"(x),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", diff --git a/kernel/zarch/ztrmm4x4V.S b/kernel/zarch/ztrmm4x4V.S index 52ee15f06..6fd7f2509 100644 --- a/kernel/zarch/ztrmm4x4V.S +++ b/kernel/zarch/ztrmm4x4V.S @@ -196,7 +196,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,4 nill LOCAL_VAR1,3 #else - la LOCAL_VAR1,3(0,0) + lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x4_BK_Store @@ -256,7 +256,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x4_BK_Store @@ -307,7 +307,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x4_BK_Store @@ -390,7 +390,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x2_BK_Store @@ -447,7 +447,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x2_BK_Store @@ -497,7 +497,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x2_BK_Store @@ -573,7 +573,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x1_BK_Store @@ -625,7 +625,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x1_BK_Store @@ -675,7 +675,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x1_BK_Store diff --git a/relapack/src/cgbtrf.c b/relapack/src/cgbtrf.c index 61332c6a6..e52f2e6c1 100644 --- a/relapack/src/cgbtrf.c +++ b/relapack/src/cgbtrf.c @@ -36,6 +36,7 @@ void RELAPACK_cgbtrf( return; } + if (*m == 0 || *n == 0) return; // Constant const float ZERO[] = { 0., 0. }; @@ -56,10 +57,10 @@ void RELAPACK_cgbtrf( // Allocate work space const blasint n1 = CREC_SPLIT(*n); - const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv; - const blasint nWorkl = (kv > n1) ? n1 : kv; - const blasint mWorku = (*kl > n1) ? n1 : *kl; - const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl; + const blasint mWorkl = abs ( (kv > n1) ? MAX(1, *m - *kl) : kv); + const blasint nWorkl = abs ( (kv > n1) ? n1 : kv); + const blasint mWorku = abs ((*kl > n1) ? n1 : *kl); + const blasint nWorku = abs ((*kl > n1) ? MAX(0, *n - *kl) : *kl); float *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(float)); float *Worku = malloc(mWorku * nWorku * 2 * sizeof(float)); LAPACK(claset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl); @@ -82,7 +83,7 @@ static void RELAPACK_cgbtrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_CGBTRF, 1)) { + if (*n <= MAX(CROSSOVER_CGBTRF, 1)|| *n > *kl || *ldAb == 1) { // Unblocked LAPACK(cgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); return; diff --git a/relapack/src/cgetrf.c b/relapack/src/cgetrf.c index 878c9ec15..bf9ca53f4 100644 --- a/relapack/src/cgetrf.c +++ b/relapack/src/cgetrf.c @@ -30,6 +30,8 @@ void RELAPACK_cgetrf( return; } + if (*m == 0 || *n == 0) return; + const blasint sn = MIN(*m, *n); RELAPACK_cgetrf_rec(m, &sn, A, ldA, ipiv, info); @@ -62,9 +64,11 @@ static void RELAPACK_cgetrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_CGETRF, 1)) { + if (*m == 0 || *n == 0) return; + + if ( *n <= MAX(CROSSOVER_CGETRF, 1)) { // Unblocked - LAPACK(cgetf2)(m, n, A, ldA, ipiv, info); + LAPACK(cgetrf2)(m, n, A, ldA, ipiv, info); return; } @@ -96,6 +100,7 @@ static void RELAPACK_cgetrf_rec( // recursion(A_L, ipiv_T) RELAPACK_cgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info); + if (*info) return; // apply pivots to A_R LAPACK(claswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE); diff --git a/relapack/src/chegst.c b/relapack/src/chegst.c index fe77b03ea..8557c2952 100644 --- a/relapack/src/chegst.c +++ b/relapack/src/chegst.c @@ -40,6 +40,8 @@ void RELAPACK_chegst( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; diff --git a/relapack/src/chetrf_rook.c b/relapack/src/chetrf_rook.c index 3d2fa3216..9ed1261cf 100644 --- a/relapack/src/chetrf_rook.c +++ b/relapack/src/chetrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_chetrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_chetrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("CHETRF", &minfo, strlen("CHETRF")); + LAPACK(xerbla)("CHETRF_ROOK", &minfo, strlen("CHETRF_ROOK")); return; } diff --git a/relapack/src/clauum.c b/relapack/src/clauum.c index 2bc93f182..58a14e7da 100644 --- a/relapack/src/clauum.c +++ b/relapack/src/clauum.c @@ -32,6 +32,8 @@ void RELAPACK_clauum( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; diff --git a/relapack/src/cpbtrf.c b/relapack/src/cpbtrf.c index 971e547c6..a0fa13850 100644 --- a/relapack/src/cpbtrf.c +++ b/relapack/src/cpbtrf.c @@ -35,6 +35,8 @@ void RELAPACK_cpbtrf( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -43,8 +45,8 @@ void RELAPACK_cpbtrf( // Allocate work space const blasint n1 = CREC_SPLIT(*n); - const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; - const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; + const blasint mWork = abs((*kd > n1) ? (lower ? *n - *kd : n1) : *kd); + const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); float *Work = malloc(mWork * nWork * 2 * sizeof(float)); LAPACK(claset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); @@ -64,7 +66,7 @@ static void RELAPACK_cpbtrf_rec( blasint *info ){ - if (*n <= MAX(CROSSOVER_CPBTRF, 1)) { + if (*n <= MAX(CROSSOVER_CPBTRF, 1) || *ldAb==1) { // Unblocked LAPACK(cpbtf2)(uplo, n, kd, Ab, ldAb, info); return; @@ -148,7 +150,7 @@ static void RELAPACK_cpbtrf_rec( } // recursion(A_BR) - if (*kd > n1) + if (*kd > n1 && ldA != 0) RELAPACK_cpotrf(uplo, &n2, A_BR, ldA, info); else RELAPACK_cpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); diff --git a/relapack/src/cpotrf.c b/relapack/src/cpotrf.c index 0f8e7ebb0..db06c6fef 100644 --- a/relapack/src/cpotrf.c +++ b/relapack/src/cpotrf.c @@ -32,6 +32,8 @@ void RELAPACK_cpotrf( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -46,6 +48,7 @@ static void RELAPACK_cpotrf_rec( float *A, const blasint *ldA, blasint *info ){ + if (*n == 0) return; if (*n <= MAX(CROSSOVER_CPOTRF, 1)) { // Unblocked diff --git a/relapack/src/csytrf.c b/relapack/src/csytrf.c index 2ebc31001..807c91ece 100644 --- a/relapack/src/csytrf.c +++ b/relapack/src/csytrf.c @@ -36,7 +36,7 @@ void RELAPACK_csytrf( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -67,6 +67,7 @@ void RELAPACK_csytrf( blasint nout; // Recursive kernel +if (*n != 0) RELAPACK_csytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/csytrf_rook.c b/relapack/src/csytrf_rook.c index e8a9865cc..105c6b8b6 100644 --- a/relapack/src/csytrf_rook.c +++ b/relapack/src/csytrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_csytrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_csytrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("CSYTRF", &minfo, strlen("CSYTRF")); + LAPACK(xerbla)("CSYTRF_ROOK", &minfo, strlen("CSYTRF_ROOK")); return; } diff --git a/relapack/src/ctgsyl.c b/relapack/src/ctgsyl.c index 704f3ef23..632bbc14e 100644 --- a/relapack/src/ctgsyl.c +++ b/relapack/src/ctgsyl.c @@ -68,6 +68,13 @@ void RELAPACK_ctgsyl( return; } + if ( *m == 0 || *n == 0) { + *scale = 1.; + if (notran && (*ijob != 0)) + *dif = 0.; + return; + } + // Clean char * arguments const char cleantrans = notran ? 'N' : 'C'; diff --git a/relapack/src/ctrsyl.c b/relapack/src/ctrsyl.c index fed6e847e..f7b841cb0 100644 --- a/relapack/src/ctrsyl.c +++ b/relapack/src/ctrsyl.c @@ -47,6 +47,11 @@ void RELAPACK_ctrsyl( return; } + if (*m == 0 || *n == 0) { + *scale = 1.; + return; + } + // Clean char * arguments const char cleantranA = notransA ? 'N' : 'C'; const char cleantranB = notransB ? 'N' : 'C'; diff --git a/relapack/src/ctrtri.c b/relapack/src/ctrtri.c index 5201a24c7..8d736007b 100644 --- a/relapack/src/ctrtri.c +++ b/relapack/src/ctrtri.c @@ -36,6 +36,8 @@ void RELAPACK_ctrtri( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; const char cleandiag = nounit ? 'N' : 'U'; diff --git a/relapack/src/dgbtrf.c b/relapack/src/dgbtrf.c index cdf06ad5b..aac10f251 100644 --- a/relapack/src/dgbtrf.c +++ b/relapack/src/dgbtrf.c @@ -36,6 +36,8 @@ void RELAPACK_dgbtrf( return; } + if (*m == 0 || *n == 0) return; + // Constant const double ZERO[] = { 0. }; @@ -83,7 +85,7 @@ static void RELAPACK_dgbtrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_DGBTRF, 1)) { + if (*n <= MAX(CROSSOVER_DGBTRF, 1) || *n > *kl || *ldAb == 1) { // Unblocked LAPACK(dgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); return; @@ -195,6 +197,7 @@ static void RELAPACK_dgbtrf_rec( // Worku = A_TRr LAPACK(dlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku); // Worku = A_TL \ Worku + if (ldWorku <= 0) return; BLAS(dtrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku); // A_TRr = Worku LAPACK(dlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA); diff --git a/relapack/src/dgetrf.c b/relapack/src/dgetrf.c index be960fde9..3ebfb18d2 100644 --- a/relapack/src/dgetrf.c +++ b/relapack/src/dgetrf.c @@ -29,15 +29,16 @@ void RELAPACK_dgetrf( return; } - const blasint sn = MIN(*m, *n); + if (*m == 0 || *n == 0) return; + const blasint sn = MIN(*m, *n); RELAPACK_dgetrf_rec(m, &sn, A, ldA, ipiv, info); // Right remainder if (*m < *n) { // Constants const double ONE[] = { 1. }; - const blasint iONE[] = { 1. }; + const blasint iONE[] = { 1 }; // Splitting const blasint rn = *n - *m; @@ -60,13 +61,11 @@ static void RELAPACK_dgetrf_rec( double *A, const blasint *ldA, blasint *ipiv, blasint *info ) { - - if (*n <= MAX(CROSSOVER_DGETRF, 1)) { + if ( *n <= MAX(CROSSOVER_DGETRF, 1)) { // Unblocked - LAPACK(dgetf2)(m, n, A, ldA, ipiv, info); + LAPACK(dgetrf2)(m, n, A, ldA, ipiv, info); return; } - // Constants const double ONE[] = { 1. }; const double MONE[] = { -1. }; @@ -95,6 +94,7 @@ static void RELAPACK_dgetrf_rec( // recursion(A_L, ipiv_T) RELAPACK_dgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info); + if (*info) return; // apply pivots to A_R LAPACK(dlaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE); diff --git a/relapack/src/dpbtrf.c b/relapack/src/dpbtrf.c index 9380b28ad..94e9b80e2 100644 --- a/relapack/src/dpbtrf.c +++ b/relapack/src/dpbtrf.c @@ -35,6 +35,8 @@ void RELAPACK_dpbtrf( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -43,8 +45,8 @@ void RELAPACK_dpbtrf( // Allocate work space const blasint n1 = DREC_SPLIT(*n); - const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; - const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; + const blasint mWork = abs((*kd > n1) ? (lower ? *n - *kd : n1) : *kd); + const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); double *Work = malloc(mWork * nWork * sizeof(double)); LAPACK(dlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); @@ -64,7 +66,7 @@ static void RELAPACK_dpbtrf_rec( blasint *info ){ - if (*n <= MAX(CROSSOVER_DPBTRF, 1)) { + if (*n <= MAX(CROSSOVER_DPBTRF, 1) || *ldAb == 1) { // Unblocked LAPACK(dpbtf2)(uplo, n, kd, Ab, ldAb, info); return; @@ -148,7 +150,7 @@ static void RELAPACK_dpbtrf_rec( } // recursion(A_BR) - if (*kd > n1) + if (*kd > n1 && ldA != 0) RELAPACK_dpotrf(uplo, &n2, A_BR, ldA, info); else RELAPACK_dpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); diff --git a/relapack/src/dsytrf.c b/relapack/src/dsytrf.c index 43d28f94e..ba869ad11 100644 --- a/relapack/src/dsytrf.c +++ b/relapack/src/dsytrf.c @@ -36,7 +36,7 @@ void RELAPACK_dsytrf( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -67,6 +67,7 @@ void RELAPACK_dsytrf( blasint nout; // Recursive kernel +if (*n != 0) RELAPACK_dsytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/dsytrf_rook.c b/relapack/src/dsytrf_rook.c index 78fa652ab..fcdc2809f 100644 --- a/relapack/src/dsytrf_rook.c +++ b/relapack/src/dsytrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_dsytrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork <1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_dsytrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("DSYTRF", &minfo, strlen("DSYTRF")); + LAPACK(xerbla)("DSYTRF_ROOK", &minfo, strlen("DSYTRF_ROOK")); return; } diff --git a/relapack/src/dtrsyl.c b/relapack/src/dtrsyl.c index 766377300..4948c4977 100644 --- a/relapack/src/dtrsyl.c +++ b/relapack/src/dtrsyl.c @@ -49,6 +49,11 @@ void RELAPACK_dtrsyl( return; } + if (*m == 0 || *n == 0) { + *scale = 1.; + return; + } + // Clean char * arguments const char cleantranA = notransA ? 'N' : (transA ? 'T' : 'C'); const char cleantranB = notransB ? 'N' : (transB ? 'T' : 'C'); diff --git a/relapack/src/lapack.h b/relapack/src/lapack.h index 776b0589f..9e9cdff7e 100644 --- a/relapack/src/lapack.h +++ b/relapack/src/lapack.h @@ -4,6 +4,13 @@ extern blasint LAPACK(lsame)(const char *, const char *); extern blasint LAPACK(xerbla)(const char *, const blasint *, int); +extern const blasint LAPACK(ilaenv)(const blasint *, const char*, const char*, const blasint* , int , int, int ); + +extern void LAPACK(sgetrf2)(const blasint *, const blasint *, float *, const blasint *, blasint *, blasint *); +extern void LAPACK(dgetrf2)(const blasint *, const blasint *, double *, const blasint *, blasint *, blasint *); +extern void LAPACK(cgetrf2)(const blasint *, const blasint *, float *, const blasint *, blasint *, blasint *); +extern void LAPACK(zgetrf2)(const blasint *, const blasint *, double *, const blasint *, blasint *, blasint *); + extern void LAPACK(slaswp)(const blasint *, float *, const blasint *, const blasint *, const blasint *, const blasint *, const blasint *); extern void LAPACK(dlaswp)(const blasint *, double *, const blasint *, const blasint *, const blasint *, const blasint *, const blasint *); extern void LAPACK(claswp)(const blasint *, float *, const blasint *, const blasint *, const blasint *, const blasint *, const blasint *); diff --git a/relapack/src/sgbtrf.c b/relapack/src/sgbtrf.c index 3e3fdf455..76e84e671 100644 --- a/relapack/src/sgbtrf.c +++ b/relapack/src/sgbtrf.c @@ -35,6 +35,13 @@ void RELAPACK_sgbtrf( return; } + if (*m == 0 || *n == 0) return; + + if (*ldAb == 1) { + LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); + return; + } + // Constant const float ZERO[] = { 0. }; @@ -82,8 +89,9 @@ static void RELAPACK_sgbtrf_rec( blasint *info ) { + if (*m == 0 || *n == 0) return; - if (*n <= MAX(CROSSOVER_SGBTRF, 1)) { + if ( *n <= MAX(CROSSOVER_SGBTRF, 1) || *n > *kl || *ldAb == 1) { // Unblocked LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); return; @@ -160,7 +168,7 @@ static void RELAPACK_sgbtrf_rec( // recursion(Ab_L, ipiv_T) RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); - + if (*info) return; // Workl = A_BLb LAPACK(slacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl); @@ -222,8 +230,8 @@ static void RELAPACK_sgbtrf_rec( // recursion(Ab_BR, ipiv_B) //cause of infinite recursion here ? -// RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); - LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); + RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); +// LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); if (*info) *info += n1; // shift pivots diff --git a/relapack/src/sgetrf.c b/relapack/src/sgetrf.c index 0231cc166..a0c7015fd 100644 --- a/relapack/src/sgetrf.c +++ b/relapack/src/sgetrf.c @@ -14,7 +14,6 @@ void RELAPACK_sgetrf( float *A, const blasint *ldA, blasint *ipiv, blasint *info ) { - // Check arguments *info = 0; if (*m < 0) @@ -28,6 +27,9 @@ void RELAPACK_sgetrf( LAPACK(xerbla)("SGETRF", &minfo, strlen("SGETRF")); return; } + + if (*m == 0 || *n == 0) return; + const blasint sn = MIN(*m, *n); RELAPACK_sgetrf_rec(m, &sn, A, ldA, ipiv, info); @@ -35,7 +37,7 @@ void RELAPACK_sgetrf( if (*m < *n) { // Constants const float ONE[] = { 1. }; - const blasint iONE[] = { 1. }; + const blasint iONE[] = { 1 }; // Splitting const blasint rn = *n - *m; @@ -58,9 +60,12 @@ static void RELAPACK_sgetrf_rec( float *A, const blasint *ldA, blasint *ipiv, blasint *info ) { - if (*n <= MAX(CROSSOVER_SGETRF, 1)) { + + if (*m == 0 || *n == 0) return; + + if ( *n <= MAX(CROSSOVER_SGETRF, 1)) { // Unblocked - LAPACK(sgetf2)(m, n, A, ldA, ipiv, info); + LAPACK(sgetrf2)(m, n, A, ldA, ipiv, info); return; } @@ -91,6 +96,8 @@ static void RELAPACK_sgetrf_rec( // recursion(A_L, ipiv_T) RELAPACK_sgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info); + if (*info) + return; // apply pivots to A_R LAPACK(slaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE); diff --git a/relapack/src/spbtrf.c b/relapack/src/spbtrf.c index 26804dcc2..330276312 100644 --- a/relapack/src/spbtrf.c +++ b/relapack/src/spbtrf.c @@ -35,6 +35,9 @@ void RELAPACK_spbtrf( return; } + + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -43,8 +46,8 @@ void RELAPACK_spbtrf( // Allocate work space const blasint n1 = SREC_SPLIT(*n); - const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; - const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; + const blasint mWork = abs( (*kd > n1) ? (lower ? *n - *kd : n1) : *kd); + const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); float *Work = malloc(mWork * nWork * sizeof(float)); LAPACK(slaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); @@ -64,7 +67,9 @@ static void RELAPACK_spbtrf_rec( blasint *info ){ - if (*n <= MAX(CROSSOVER_SPBTRF, 1)) { + if (*n == 0 ) return; + + if ( *n <= MAX(CROSSOVER_SPBTRF, 1) || *ldAb == 1) { // Unblocked LAPACK(spbtf2)(uplo, n, kd, Ab, ldAb, info); return; @@ -148,7 +153,7 @@ static void RELAPACK_spbtrf_rec( } // recursion(A_BR) - if (*kd > n1) + if (*kd > n1 && ldA != 0) RELAPACK_spotrf(uplo, &n2, A_BR, ldA, info); else RELAPACK_spbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); diff --git a/relapack/src/ssytrf.c b/relapack/src/ssytrf.c index 9fe7ce4a6..5f8e03391 100644 --- a/relapack/src/ssytrf.c +++ b/relapack/src/ssytrf.c @@ -35,7 +35,7 @@ void RELAPACK_ssytrf( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork <1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -66,6 +66,7 @@ void RELAPACK_ssytrf( blasint nout; // Recursive kernel +if (*n != 0) RELAPACK_ssytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/ssytrf_rook.c b/relapack/src/ssytrf_rook.c index abcf29d1c..b40f12271 100644 --- a/relapack/src/ssytrf_rook.c +++ b/relapack/src/ssytrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_ssytrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 ||*lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_ssytrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("SSYTRF", &minfo, strlen("SSYTRF")); + LAPACK(xerbla)("SSYTRF_ROOK", &minfo, strlen("SSYTRF_ROOK")); return; } @@ -67,6 +67,7 @@ void RELAPACK_ssytrf_rook( blasint nout; // Recursive kernel +if (*n != 0) RELAPACK_ssytrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/strsyl.c b/relapack/src/strsyl.c index 012fb3548..d85963fcc 100644 --- a/relapack/src/strsyl.c +++ b/relapack/src/strsyl.c @@ -49,6 +49,11 @@ void RELAPACK_strsyl( return; } + if (*m == 0 || *n == 0) { + *scale = 1.; + return; + } + // Clean char * arguments const char cleantranA = notransA ? 'N' : (transA ? 'T' : 'C'); const char cleantranB = notransB ? 'N' : (transB ? 'T' : 'C'); diff --git a/relapack/src/zgbtrf.c b/relapack/src/zgbtrf.c index d4ba41753..5d7dfd3c7 100644 --- a/relapack/src/zgbtrf.c +++ b/relapack/src/zgbtrf.c @@ -36,6 +36,8 @@ void RELAPACK_zgbtrf( return; } + if (*m == 0 || *n == 0) return; + // Constant const double ZERO[] = { 0., 0. }; @@ -82,7 +84,7 @@ static void RELAPACK_zgbtrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_ZGBTRF, 1)) { + if (*n <= MAX(CROSSOVER_ZGBTRF, 1) || *n > *kl || *ldAb == 1) { // Unblocked LAPACK(zgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); return; @@ -92,6 +94,7 @@ static void RELAPACK_zgbtrf_rec( const double ONE[] = { 1., 0. }; const double MONE[] = { -1., 0. }; const blasint iONE[] = { 1 }; + const blasint min11 = -11; // Loop iterators blasint i, j; @@ -158,6 +161,7 @@ static void RELAPACK_zgbtrf_rec( // recursion(Ab_L, ipiv_T) RELAPACK_zgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); +if (*info) return; // Workl = A_BLb LAPACK(zlacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl); @@ -193,11 +197,21 @@ static void RELAPACK_zgbtrf_rec( } // A_TRl = A_TL \ A_TRl + if (*ldA < MAX(1,m1)) { + LAPACK(xerbla)("ZGBTRF", &min11, strlen("ZGBTRF")); + return; + } else { BLAS(ztrsm)("L", "L", "N", "U", &m1, &n21, ONE, A_TL, ldA, A_TRl, ldA); + } // Worku = A_TRr LAPACK(zlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku); // Worku = A_TL \ Worku + if (*ldWorku < MAX(1,m1)) { + LAPACK(xerbla)("ZGBTRF", &min11, strlen("ZGBTRF")); + return; + } else { BLAS(ztrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku); + } // A_TRr = Worku LAPACK(zlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA); // A_BRtl = A_BRtl - A_BLt * A_TRl diff --git a/relapack/src/zgetrf.c b/relapack/src/zgetrf.c index b0d14ffb1..8c3e8a8e8 100644 --- a/relapack/src/zgetrf.c +++ b/relapack/src/zgetrf.c @@ -30,6 +30,7 @@ void RELAPACK_zgetrf( return; } + if (*m == 0 || *n == 0) return; const blasint sn = MIN(*m, *n); RELAPACK_zgetrf_rec(m, &sn, A, ldA, ipiv, info); @@ -62,9 +63,11 @@ static void RELAPACK_zgetrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_ZGETRF, 1)) { + if (*m == 0 || *n == 0) return; + + if ( *n <= MAX(CROSSOVER_ZGETRF, 1)) { // Unblocked - LAPACK(zgetf2)(m, n, A, ldA, ipiv, info); + LAPACK(zgetrf2)(m, n, A, ldA, ipiv, info); return; } @@ -96,6 +99,8 @@ static void RELAPACK_zgetrf_rec( // recursion(A_L, ipiv_T) RELAPACK_zgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info); +if (*info) return; + // apply pivots to A_R LAPACK(zlaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE); diff --git a/relapack/src/zhetrf_rook.c b/relapack/src/zhetrf_rook.c index 285aea96e..605e3a77f 100644 --- a/relapack/src/zhetrf_rook.c +++ b/relapack/src/zhetrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_zhetrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_zhetrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("ZHETRF", &minfo, strlen("ZHETRF")); + LAPACK(xerbla)("ZHETRF_ROOK", &minfo, strlen("ZHETRF_ROOK")); return; } diff --git a/relapack/src/zpbtrf.c b/relapack/src/zpbtrf.c index fb0e1e97b..8b094380c 100644 --- a/relapack/src/zpbtrf.c +++ b/relapack/src/zpbtrf.c @@ -35,6 +35,8 @@ void RELAPACK_zpbtrf( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -43,9 +45,10 @@ void RELAPACK_zpbtrf( // Allocate work space const blasint n1 = ZREC_SPLIT(*n); - const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; - const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; + const blasint mWork = abs((*kd > n1) ? (lower ? *n - *kd : n1) : *kd); + const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); double *Work = malloc(mWork * nWork * 2 * sizeof(double)); + LAPACK(zlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); // Recursive kernel @@ -64,7 +67,7 @@ static void RELAPACK_zpbtrf_rec( blasint *info ){ - if (*n <= MAX(CROSSOVER_ZPBTRF, 1)) { + if (*n <= MAX(CROSSOVER_ZPBTRF, 1) || *ldAb == 1) { // Unblocked LAPACK(zpbtf2)(uplo, n, kd, Ab, ldAb, info); return; @@ -148,7 +151,7 @@ static void RELAPACK_zpbtrf_rec( } // recursion(A_BR) - if (*kd > n1) + if (*kd > n1 && ldA != 0) RELAPACK_zpotrf(uplo, &n2, A_BR, ldA, info); else RELAPACK_zpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); diff --git a/relapack/src/zsytrf.c b/relapack/src/zsytrf.c index f3412ad8f..59daba02f 100644 --- a/relapack/src/zsytrf.c +++ b/relapack/src/zsytrf.c @@ -36,7 +36,7 @@ void RELAPACK_zsytrf( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -67,6 +67,7 @@ void RELAPACK_zsytrf( blasint nout; // Recursive kernel + if (*n != 0) RELAPACK_zsytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/zsytrf_rook.c b/relapack/src/zsytrf_rook.c index fc6d73645..0fd8e7033 100644 --- a/relapack/src/zsytrf_rook.c +++ b/relapack/src/zsytrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_zsytrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_zsytrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("ZSYTRF", &minfo, strlen("ZSYTRF")); + LAPACK(xerbla)("ZSYTRF_ROOK", &minfo, strlen("ZSYTRF_ROOK")); return; } @@ -67,6 +67,7 @@ void RELAPACK_zsytrf_rook( blasint nout; // Recursive kernel + if (*n != 0) RELAPACK_zsytrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/ztrsyl.c b/relapack/src/ztrsyl.c index 567ef115a..9d0107526 100644 --- a/relapack/src/ztrsyl.c +++ b/relapack/src/ztrsyl.c @@ -47,6 +47,11 @@ void RELAPACK_ztrsyl( return; } + if (*m == 0 || *n == 0) { + *scale = 1.; + return; + } + // Clean char * arguments const char cleantranA = notransA ? 'N' : 'C'; const char cleantranB = notransB ? 'N' : 'C'; diff --git a/relapack/src/ztrtri.c b/relapack/src/ztrtri.c index 3f6606d84..54854f525 100644 --- a/relapack/src/ztrtri.c +++ b/relapack/src/ztrtri.c @@ -69,8 +69,8 @@ static void RELAPACK_ztrtri_rec( } // Constants - const double ONE[] = { 1. }; - const double MONE[] = { -1. }; + const double ONE[] = { 1., 0. }; + const double MONE[] = { -1. , 0. }; // Splitting const blasint n1 = ZREC_SPLIT(*n);