diff --git a/.github/workflows/mips64.yml b/.github/workflows/mips64.yml new file mode 100644 index 000000000..a5bd7b84b --- /dev/null +++ b/.github/workflows/mips64.yml @@ -0,0 +1,114 @@ +name: mips64 qemu test + +on: [push, pull_request] + +jobs: + TEST: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - target: MIPS64_GENERIC + triple: mips64el-linux-gnuabi64 + opts: NO_SHARED=1 TARGET=MIPS64_GENERIC + - target: SICORTEX + triple: mips64el-linux-gnuabi64 + opts: NO_SHARED=1 TARGET=SICORTEX + - target: I6400 + triple: mipsisa64r6el-linux-gnuabi64 + opts: NO_SHARED=1 TARGET=I6400 + - target: P6600 + triple: mipsisa64r6el-linux-gnuabi64 + opts: NO_SHARED=1 TARGET=P6600 + - target: I6500 + triple: mipsisa64r6el-linux-gnuabi64 + opts: NO_SHARED=1 TARGET=I6500 + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: install build deps + run: | + sudo apt-get update + sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ + gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross + + - name: checkout qemu + uses: actions/checkout@v3 + with: + repository: qemu/qemu + path: qemu + ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2 + + - name: build qemu + run: | + cd qemu + ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system + make -j$(nproc) + make install + + - name: Compilation cache + uses: actions/cache@v3 + with: + path: ~/.ccache + key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} + restore-keys: | + ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} + ccache-${{ runner.os }}-${{ matrix.target }} + + - name: Configure ccache + run: | + test -d ~/.ccache || mkdir -p ~/.ccache + echo "max_size = 300M" > ~/.ccache/ccache.conf + echo "compression = true" >> ~/.ccache/ccache.conf + ccache -s + + - name: build OpenBLAS + run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc) + + - name: test + run: | + export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH + qemu-mips64el ./utest/openblas_utest + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1 + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1 + rm -f ./test/?BLAT2.SUMM + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat + rm -f ./test/?BLAT2.SUMM + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat + rm -f ./test/?BLAT3.SUMM + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat + rm -f ./test/?BLAT3.SUMM + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat diff --git a/cmake/system.cmake b/cmake/system.cmake index a9fc0f4b7..fd68f79d6 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -197,14 +197,14 @@ if (DEFINED TARGET) if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") endif() - if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2) + if ((${TARGET} STREQUAL HASWELL OR ${TARGET} STREQUAL ZEN) AND NOT NO_AVX2) if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") endif() elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2 -mfma") endif() endif() if (DEFINED HAVE_AVX) diff --git a/common.h b/common.h index e6002d322..4eeeb8d55 100644 --- a/common.h +++ b/common.h @@ -387,6 +387,10 @@ typedef int blasint; #endif */ +#ifdef __EMSCRIPTEN__ +#define YIELDING +#endif + #ifndef YIELDING #define YIELDING sched_yield() #endif diff --git a/ctest.c b/ctest.c index df628b1d4..2ccae8dcc 100644 --- a/ctest.c +++ b/ctest.c @@ -173,3 +173,8 @@ HAVE_C11 ARCH_E2K #endif +#if defined(__EMSCRIPTEN__) +ARCH_RISCV64 +OS_WINDOWS +#endif + diff --git a/ctest/c_sblat1c.c b/ctest/c_sblat1c.c index 4993d31bb..57e4707a9 100644 --- a/ctest/c_sblat1c.c +++ b/ctest/c_sblat1c.c @@ -969,7 +969,7 @@ real *sfac; 1.17 }; /* Local variables */ - extern /* Subroutine */ srottest_(); + extern /* Subroutine */ void srottest_(); static integer i__, k, ksize; extern /* Subroutine */ int stest_(), srotmtest_(); static integer ki, kn; diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 1a5fd06a3..c158f92ee 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -69,6 +69,8 @@ int blas_server_avail = 0; +extern int openblas_omp_adaptive_env(); + static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; #ifdef HAVE_C11 static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; diff --git a/kernel/Makefile b/kernel/Makefile index cbe4cde6e..977886044 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -23,7 +23,7 @@ ifeq ($(C_COMPILER), CLANG) # Any clang posing as gcc 4.2 should be new enough (3.4 or later) GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2) ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) - AVX2OPT = -mavx2 + AVX2OPT = -mavx2 -mfma endif endif ifdef NO_AVX2 @@ -73,6 +73,8 @@ else ifeq ($(TARGET_CORE), SKYLAKEX) endif else ifeq ($(TARGET_CORE), HASWELL) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT) +else ifeq ($(TARGET_CORE), ZEN) + override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT) else ifeq ($(TARGET_CORE), LOONGSON3R4) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS) else diff --git a/kernel/mips/sdot_msa.c b/kernel/mips/sdot_msa.c index e02e10c61..8c250d401 100644 --- a/kernel/mips/sdot_msa.c +++ b/kernel/mips/sdot_msa.c @@ -39,10 +39,19 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) FLOAT x0, x1, x2, x3, y0, y1, y2, y3; v4f32 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; v4f32 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; +#if defined(DSDOT) + v2f64 dvx0, dvx1, dvx2, dvx3, dvx4, dvx5, dvx6, dvx7; + v2f64 dvy0, dvy1, dvy2, dvy3, dvy4, dvy5, dvy6, dvy7; + v2f64 dot0 = {0, 0}; + v2f64 dot1 = {0, 0}; + v2f64 dot2 = {0, 0}; + v2f64 dot3 = {0, 0}; +#else v4f32 dot0 = {0, 0, 0, 0}; v4f32 dot1 = {0, 0, 0, 0}; v4f32 dot2 = {0, 0, 0, 0}; v4f32 dot3 = {0, 0, 0, 0}; +#endif if (n < 1) return (dot); @@ -83,6 +92,61 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) x_pref += 32; y_pref += 32; +#if defined(DSDOT) + /* Extend single precision to double precision */ + dvy0 = __msa_fexupr_d(vy0); + dvy1 = __msa_fexupr_d(vy1); + dvy2 = __msa_fexupr_d(vy2); + dvy3 = __msa_fexupr_d(vy3); + dvy4 = __msa_fexupr_d(vy4); + dvy5 = __msa_fexupr_d(vy5); + dvy6 = __msa_fexupr_d(vy6); + dvy7 = __msa_fexupr_d(vy7); + + vy0 = (v4f32)__msa_fexupl_d(vy0); + vy1 = (v4f32)__msa_fexupl_d(vy1); + vy2 = (v4f32)__msa_fexupl_d(vy2); + vy3 = (v4f32)__msa_fexupl_d(vy3); + vy4 = (v4f32)__msa_fexupl_d(vy4); + vy5 = (v4f32)__msa_fexupl_d(vy5); + vy6 = (v4f32)__msa_fexupl_d(vy6); + vy7 = (v4f32)__msa_fexupl_d(vy7); + + dvx0 = __msa_fexupr_d(vx0); + dvx1 = __msa_fexupr_d(vx1); + dvx2 = __msa_fexupr_d(vx2); + dvx3 = __msa_fexupr_d(vx3); + dvx4 = __msa_fexupr_d(vx4); + dvx5 = __msa_fexupr_d(vx5); + dvx6 = __msa_fexupr_d(vx6); + dvx7 = __msa_fexupr_d(vx7); + + vx0 = (v4f32)__msa_fexupl_d(vx0); + vx1 = (v4f32)__msa_fexupl_d(vx1); + vx2 = (v4f32)__msa_fexupl_d(vx2); + vx3 = (v4f32)__msa_fexupl_d(vx3); + vx4 = (v4f32)__msa_fexupl_d(vx4); + vx5 = (v4f32)__msa_fexupl_d(vx5); + vx6 = (v4f32)__msa_fexupl_d(vx6); + vx7 = (v4f32)__msa_fexupl_d(vx7); + + dot0 += (dvy0 * dvx0); + dot1 += (dvy1 * dvx1); + dot2 += (dvy2 * dvx2); + dot3 += (dvy3 * dvx3); + dot0 += (dvy4 * dvx4); + dot1 += (dvy5 * dvx5); + dot2 += (dvy6 * dvx6); + dot3 += (dvy7 * dvx7); + dot0 += ((v2f64)vy0 * (v2f64)vx0); + dot1 += ((v2f64)vy1 * (v2f64)vx1); + dot2 += ((v2f64)vy2 * (v2f64)vx2); + dot3 += ((v2f64)vy3 * (v2f64)vx3); + dot0 += ((v2f64)vy4 * (v2f64)vx4); + dot1 += ((v2f64)vy5 * (v2f64)vx5); + dot2 += ((v2f64)vy6 * (v2f64)vx6); + dot3 += ((v2f64)vy7 * (v2f64)vx7); +#else dot0 += (vy0 * vx0); dot1 += (vy1 * vx1); dot2 += (vy2 * vx2); @@ -91,6 +155,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) dot1 += (vy5 * vx5); dot2 += (vy6 * vx6); dot3 += (vy7 * vx7); +#endif } if (n & 31) @@ -100,10 +165,41 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) LD_SP4_INC(x, 4, vx0, vx1, vx2, vx3); LD_SP4_INC(y, 4, vy0, vy1, vy2, vy3); +#if defined(DSDOT) + dvy0 = __msa_fexupr_d(vy0); + dvy1 = __msa_fexupr_d(vy1); + dvy2 = __msa_fexupr_d(vy2); + dvy3 = __msa_fexupr_d(vy3); + + vy0 = (v4f32)__msa_fexupl_d(vy0); + vy1 = (v4f32)__msa_fexupl_d(vy1); + vy2 = (v4f32)__msa_fexupl_d(vy2); + vy3 = (v4f32)__msa_fexupl_d(vy3); + + dvx0 = __msa_fexupr_d(vx0); + dvx1 = __msa_fexupr_d(vx1); + dvx2 = __msa_fexupr_d(vx2); + dvx3 = __msa_fexupr_d(vx3); + + vx0 = (v4f32)__msa_fexupl_d(vx0); + vx1 = (v4f32)__msa_fexupl_d(vx1); + vx2 = (v4f32)__msa_fexupl_d(vx2); + vx3 = (v4f32)__msa_fexupl_d(vx3); + + dot0 += (dvy0 * dvx0); + dot1 += (dvy1 * dvx1); + dot2 += (dvy2 * dvx2); + dot3 += (dvy3 * dvx3); + dot0 += ((v2f64)vy0 * (v2f64)vx0); + dot1 += ((v2f64)vy1 * (v2f64)vx1); + dot2 += ((v2f64)vy2 * (v2f64)vx2); + dot3 += ((v2f64)vy3 * (v2f64)vx3); +#else dot0 += (vy0 * vx0); dot1 += (vy1 * vx1); dot2 += (vy2 * vx2); dot3 += (vy3 * vx3); +#endif } if (n & 8) @@ -111,8 +207,27 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) LD_SP2_INC(x, 4, vx0, vx1); LD_SP2_INC(y, 4, vy0, vy1); +#if defined(DSDOT) + dvy0 = __msa_fexupr_d(vy0); + dvy1 = __msa_fexupr_d(vy1); + + vy0 = (v4f32)__msa_fexupl_d(vy0); + vy1 = (v4f32)__msa_fexupl_d(vy1); + + dvx0 = __msa_fexupr_d(vx0); + dvx1 = __msa_fexupr_d(vx1); + + vx0 = (v4f32)__msa_fexupl_d(vx0); + vx1 = (v4f32)__msa_fexupl_d(vx1); + + dot0 += (dvy0 * dvx0); + dot1 += (dvy1 * dvx1); + dot0 += ((v2f64)vy0 * (v2f64)vx0); + dot1 += ((v2f64)vy1 * (v2f64)vx1); +#else dot0 += (vy0 * vx0); dot1 += (vy1 * vx1); +#endif } if (n & 4) @@ -120,7 +235,16 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) vx0 = LD_SP(x); x += 4; vy0 = LD_SP(y); y += 4; +#if defined(DSDOT) + dvy0 = __msa_fexupr_d(vy0); + vy0 = (v4f32)__msa_fexupl_d(vy0); + dvx0 = __msa_fexupr_d(vx0); + vx0 = (v4f32)__msa_fexupl_d(vx0); + dot0 += (dvy0 * dvx0); + dot0 += ((v2f64)vy0 * (v2f64)vx0); +#else dot0 += (vy0 * vx0); +#endif } if (n & 2) @@ -128,8 +252,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) LD_GP2_INC(x, 1, x0, x1); LD_GP2_INC(y, 1, y0, y1); +#if defined(DSDOT) + dot += ((double)y0 * (double)x0); + dot += ((double)y1 * (double)x1); +#else dot += (y0 * x0); dot += (y1 * x1); +#endif } if (n & 1) @@ -137,7 +266,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) x0 = *x; y0 = *y; +#if defined(DSDOT) + dot += ((double)y0 * (double)x0); +#else dot += (y0 * x0); +#endif } } @@ -145,8 +278,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) dot += dot0[0]; dot += dot0[1]; +#if !defined(DSDOT) dot += dot0[2]; dot += dot0[3]; +#endif } else { @@ -155,10 +290,17 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) LD_GP4_INC(x, inc_x, x0, x1, x2, x3); LD_GP4_INC(y, inc_y, y0, y1, y2, y3); +#if defined(DSDOT) + dot += ((double)y0 * (double)x0); + dot += ((double)y1 * (double)x1); + dot += ((double)y2 * (double)x2); + dot += ((double)y3 * (double)x3); +#else dot += (y0 * x0); dot += (y1 * x1); dot += (y2 * x2); dot += (y3 * x3); +#endif } if (n & 2) @@ -166,8 +308,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) LD_GP2_INC(x, inc_x, x0, x1); LD_GP2_INC(y, inc_y, y0, y1); +#if defined(DSDOT) + dot += ((double)y0 * (double)x0); + dot += ((double)y1 * (double)x1); +#else dot += (y0 * x0); dot += (y1 * x1); +#endif } if (n & 1) @@ -175,7 +322,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) x0 = *x; y0 = *y; +#if defined(DSDOT) + dot += ((double)y0 * (double)x0); +#else dot += (y0 * x0); +#endif } } diff --git a/kernel/mips64/KERNEL.MIPS64_GENERIC b/kernel/mips64/KERNEL.MIPS64_GENERIC new file mode 100644 index 000000000..17f2ef976 --- /dev/null +++ b/kernel/mips64/KERNEL.MIPS64_GENERIC @@ -0,0 +1,160 @@ +SGEMM_BETA = ../generic/gemm_beta.c +DGEMM_BETA = ../generic/gemm_beta.c +CGEMM_BETA = ../generic/zgemm_beta.c +ZGEMM_BETA = ../generic/zgemm_beta.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +#Pure C for other kernels +SAMAXKERNEL = ../mips/amax.c +DAMAXKERNEL = ../mips/amax.c +CAMAXKERNEL = ../mips/zamax.c +ZAMAXKERNEL = ../mips/zamax.c + +SAMINKERNEL = ../mips/amin.c +DAMINKERNEL = ../mips/amin.c +CAMINKERNEL = ../mips/zamin.c +ZAMINKERNEL = ../mips/zamin.c + +SMAXKERNEL = ../mips/max.c +DMAXKERNEL = ../mips/max.c + +SMINKERNEL = ../mips/min.c +DMINKERNEL = ../mips/min.c + +ISAMAXKERNEL = ../mips/iamax.c +IDAMAXKERNEL = ../mips/iamax.c +ICAMAXKERNEL = ../mips/izamax.c +IZAMAXKERNEL = ../mips/izamax.c + +ISAMINKERNEL = ../mips/iamin.c +IDAMINKERNEL = ../mips/iamin.c +ICAMINKERNEL = ../mips/izamin.c +IZAMINKERNEL = ../mips/izamin.c + +ISMAXKERNEL = ../mips/imax.c +IDMAXKERNEL = ../mips/imax.c + +ISMINKERNEL = ../mips/imin.c +IDMINKERNEL = ../mips/imin.c + +SASUMKERNEL = ../mips/asum.c +DASUMKERNEL = ../mips/asum.c +CASUMKERNEL = ../mips/zasum.c +ZASUMKERNEL = ../mips/zasum.c + +SSUMKERNEL = ../mips/sum.c +DSUMKERNEL = ../mips/sum.c +CSUMKERNEL = ../mips/zsum.c +ZSUMKERNEL = ../mips/zsum.c + +SAXPYKERNEL = ../mips/axpy.c +DAXPYKERNEL = ../mips/axpy.c +CAXPYKERNEL = ../mips/zaxpy.c +ZAXPYKERNEL = ../mips/zaxpy.c + +SCOPYKERNEL = ../mips/copy.c +DCOPYKERNEL = ../mips/copy.c +CCOPYKERNEL = ../mips/zcopy.c +ZCOPYKERNEL = ../mips/zcopy.c + +SDOTKERNEL = ../mips/dot.c +DDOTKERNEL = ../mips/dot.c +CDOTKERNEL = ../mips/zdot.c +ZDOTKERNEL = ../mips/zdot.c + +SNRM2KERNEL = ../mips/nrm2.c +DNRM2KERNEL = ../mips/nrm2.c +CNRM2KERNEL = ../mips/znrm2.c +ZNRM2KERNEL = ../mips/znrm2.c + +SROTKERNEL = ../mips/rot.c +DROTKERNEL = ../mips/rot.c +CROTKERNEL = ../mips/zrot.c +ZROTKERNEL = ../mips/zrot.c + +SSCALKERNEL = ../mips/scal.c +DSCALKERNEL = ../mips/scal.c +CSCALKERNEL = ../mips/zscal.c +ZSCALKERNEL = ../mips/zscal.c + +SSWAPKERNEL = ../mips/swap.c +DSWAPKERNEL = ../mips/swap.c +CSWAPKERNEL = ../mips/zswap.c +ZSWAPKERNEL = ../mips/zswap.c + +SGEMVNKERNEL = ../mips/gemv_n.c +DGEMVNKERNEL = ../mips/gemv_n.c +CGEMVNKERNEL = ../mips/zgemv_n.c +ZGEMVNKERNEL = ../mips/zgemv_n.c + +SGEMVTKERNEL = ../mips/gemv_t.c +DGEMVTKERNEL = ../mips/gemv_t.c +CGEMVTKERNEL = ../mips/zgemv_t.c +ZGEMVTKERNEL = ../mips/zgemv_t.c + +SSYMV_U_KERNEL = ../generic/symv_k.c +SSYMV_L_KERNEL = ../generic/symv_k.c +DSYMV_U_KERNEL = ../generic/symv_k.c +DSYMV_L_KERNEL = ../generic/symv_k.c +QSYMV_U_KERNEL = ../generic/symv_k.c +QSYMV_L_KERNEL = ../generic/symv_k.c +CSYMV_U_KERNEL = ../generic/zsymv_k.c +CSYMV_L_KERNEL = ../generic/zsymv_k.c +ZSYMV_U_KERNEL = ../generic/zsymv_k.c +ZSYMV_L_KERNEL = ../generic/zsymv_k.c +XSYMV_U_KERNEL = ../generic/zsymv_k.c +XSYMV_L_KERNEL = ../generic/zsymv_k.c + +ZHEMV_U_KERNEL = ../generic/zhemv_k.c +ZHEMV_L_KERNEL = ../generic/zhemv_k.c + +CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c +ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c diff --git a/kernel/mips64/dnrm2.S b/kernel/mips64/dnrm2.S index 0ccc781e1..cd40414a2 100644 --- a/kernel/mips64/dnrm2.S +++ b/kernel/mips64/dnrm2.S @@ -90,7 +90,7 @@ //Init INF lui TEMP, 0x7FF0 dsll TEMP, TEMP, 32 - MTC1 TEMP, INF + MTC TEMP, INF LD a1, 0 * SIZE(X) daddiu N, N, -1 diff --git a/kernel/x86_64/dgemm_ncopy_8_skylakex.c b/kernel/x86_64/dgemm_ncopy_8_skylakex.c index 74b336f3d..874ef68d6 100644 --- a/kernel/x86_64/dgemm_ncopy_8_skylakex.c +++ b/kernel/x86_64/dgemm_ncopy_8_skylakex.c @@ -52,18 +52,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT * __restrict a, BLASLONG lda, FLOAT * __ FLOAT ctemp05, ctemp06, ctemp07, ctemp08; FLOAT ctemp09, ctemp10, ctemp11, ctemp12; FLOAT ctemp13, ctemp14, ctemp15, ctemp16; - FLOAT ctemp17, ctemp18, ctemp19, ctemp20; - FLOAT ctemp21, ctemp22, ctemp23, ctemp24; - FLOAT ctemp25, ctemp26, ctemp27, ctemp28; - FLOAT ctemp29, ctemp30, ctemp31, ctemp32; - FLOAT ctemp33, ctemp34, ctemp35, ctemp36; - FLOAT ctemp37, ctemp38, ctemp39, ctemp40; - FLOAT ctemp41, ctemp42, ctemp43, ctemp44; - FLOAT ctemp45, ctemp46, ctemp47, ctemp48; - FLOAT ctemp49, ctemp50, ctemp51, ctemp52; - FLOAT ctemp53, ctemp54, ctemp55, ctemp56; - FLOAT ctemp57, ctemp58, ctemp59, ctemp60; - FLOAT ctemp61, ctemp62, ctemp63, ctemp64; + FLOAT ctemp17 /*, ctemp18, ctemp19, ctemp20*/ ; + FLOAT /*ctemp21, ctemp22,*/ ctemp23, ctemp24; + FLOAT ctemp25 /*, ctemp26, ctemp27, ctemp28*/ ; + FLOAT /*ctemp29, ctemp30,*/ ctemp31, ctemp32; + FLOAT ctemp33 /*, ctemp34, ctemp35, ctemp36*/ ; + FLOAT /*ctemp37, ctemp38,*/ ctemp39, ctemp40; + FLOAT ctemp41 /*, ctemp42, ctemp43, ctemp44*/ ; + FLOAT /*ctemp45, ctemp46,*/ ctemp47, ctemp48; + FLOAT ctemp49 /*, ctemp50, ctemp51, ctemp52*/ ; + FLOAT /*ctemp53, ctemp54,*/ ctemp55, ctemp56; + FLOAT ctemp57 /*, ctemp58, ctemp59, ctemp60*/ ; + FLOAT /*ctemp61, ctemp62,*/ ctemp63, ctemp64; aoffset = a; diff --git a/kernel/x86_64/omatcopy_rt.c b/kernel/x86_64/omatcopy_rt.c index e695f00c5..b11893f5d 100644 --- a/kernel/x86_64/omatcopy_rt.c +++ b/kernel/x86_64/omatcopy_rt.c @@ -142,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\ } int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){ - float *src, *dst, *dst_tmp, *src_base, *dst_base; + float *src, *dst, *dst_tmp=0, *src_base, *dst_base; uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0; BLASLONG cols_left, rows_done; float ALPHA = alpha; if(ALPHA==0.0){ diff --git a/lapack-netlib/SRC/claed0.c b/lapack-netlib/SRC/claed0.c index 21e408397..2b696508e 100644 --- a/lapack-netlib/SRC/claed0.c +++ b/lapack-netlib/SRC/claed0.c @@ -796,10 +796,10 @@ L10: temp = log((real) (*n)) / log(2.f); lgn = (integer) temp; - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } iprmpt = indxq + *n + 1; diff --git a/lapack-netlib/SRC/claed7.c b/lapack-netlib/SRC/claed7.c index 49fc9ed4b..1eaa7e9c2 100644 --- a/lapack-netlib/SRC/claed7.c +++ b/lapack-netlib/SRC/claed7.c @@ -864,11 +864,11 @@ f"> */ /* Form the z-vector which consists of the last row of Q_1 and the */ /* first row of Q_2. */ - ptr = pow_ii(&c__2, tlvls) + 1; + ptr = pow_ii(c__2, *tlvls) + 1; i__1 = *curlvl - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *tlvls - i__; - ptr += pow_ii(&c__2, &i__2); + ptr += pow_ii(c__2, i__2); /* L10: */ } curr = ptr + *curpbm; diff --git a/lapack-netlib/SRC/clalsa.c b/lapack-netlib/SRC/clalsa.c index 4bc3830a9..2ef3e1231 100644 --- a/lapack-netlib/SRC/clalsa.c +++ b/lapack-netlib/SRC/clalsa.c @@ -1051,7 +1051,7 @@ f"> */ /* Finally go through the left singular vector matrices of all */ /* the other subproblems bottom-up on the tree. */ - j = pow_ii(&c__2, &nlvl); + j = pow_ii(c__2, nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { @@ -1065,7 +1065,7 @@ f"> */ ll = 1; } else { i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); + lf = pow_ii(c__2, i__1); ll = (lf << 1) - 1; } i__1 = ll; @@ -1110,7 +1110,7 @@ L170: ll = 1; } else { i__2 = lvl - 1; - lf = pow_ii(&c__2, &i__2); + lf = pow_ii(c__2, i__2); ll = (lf << 1) - 1; } i__2 = lf; diff --git a/lapack-netlib/SRC/cstedc.c b/lapack-netlib/SRC/cstedc.c index 437c39e96..8f047d1ce 100644 --- a/lapack-netlib/SRC/cstedc.c +++ b/lapack-netlib/SRC/cstedc.c @@ -836,10 +836,10 @@ f"> */ lrwmin = *n - 1 << 1; } else if (icompz == 1) { lgn = (integer) (log((real) (*n)) / log(2.f)); - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } lwmin = *n * *n; diff --git a/lapack-netlib/SRC/dlaed0.c b/lapack-netlib/SRC/dlaed0.c index 95e39b0df..74e58dd2d 100644 --- a/lapack-netlib/SRC/dlaed0.c +++ b/lapack-netlib/SRC/dlaed0.c @@ -827,10 +827,10 @@ L10: temp = log((doublereal) (*n)) / log(2.); lgn = (integer) temp; - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } iprmpt = indxq + *n + 1; diff --git a/lapack-netlib/SRC/dlaed7.c b/lapack-netlib/SRC/dlaed7.c index fd8515261..d23a72be0 100644 --- a/lapack-netlib/SRC/dlaed7.c +++ b/lapack-netlib/SRC/dlaed7.c @@ -885,11 +885,11 @@ f"> */ /* Form the z-vector which consists of the last row of Q_1 and the */ /* first row of Q_2. */ - ptr = pow_ii(&c__2, tlvls) + 1; + ptr = pow_ii(c__2, *tlvls) + 1; i__1 = *curlvl - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *tlvls - i__; - ptr += pow_ii(&c__2, &i__2); + ptr += pow_ii(c__2, i__2); /* L10: */ } curr = ptr + *curpbm; diff --git a/lapack-netlib/SRC/dlaeda.c b/lapack-netlib/SRC/dlaeda.c index f4bb214d3..202e1b636 100644 --- a/lapack-netlib/SRC/dlaeda.c +++ b/lapack-netlib/SRC/dlaeda.c @@ -754,7 +754,7 @@ f"> */ /* scheme */ i__1 = *curlvl - 1; - curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; + curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1; /* Determine size of these matrices. We add HALF to the value of */ /* the SQRT in case the machine underestimates one of these square */ @@ -781,12 +781,12 @@ f"> */ /* rotations and permutation and then multiplying the center matrices */ /* against the current Z. */ - ptr = pow_ii(&c__2, tlvls) + 1; + ptr = pow_ii(c__2, *tlvls) + 1; i__1 = *curlvl - 1; for (k = 1; k <= i__1; ++k) { i__2 = *curlvl - k; i__3 = *curlvl - k - 1; - curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - + curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) - 1; psiz1 = prmptr[curr + 1] - prmptr[curr]; psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; @@ -847,7 +847,7 @@ f"> */ c__1); i__2 = *tlvls - k; - ptr += pow_ii(&c__2, &i__2); + ptr += pow_ii(c__2, i__2); /* L70: */ } diff --git a/lapack-netlib/SRC/dlalsa.c b/lapack-netlib/SRC/dlalsa.c index 891ed66a8..4d5c347c3 100644 --- a/lapack-netlib/SRC/dlalsa.c +++ b/lapack-netlib/SRC/dlalsa.c @@ -951,7 +951,7 @@ f"> */ /* Finally go through the left singular vector matrices of all */ /* the other subproblems bottom-up on the tree. */ - j = pow_ii(&c__2, &nlvl); + j = pow_ii(c__2, nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { @@ -965,7 +965,7 @@ f"> */ ll = 1; } else { i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); + lf = pow_ii(c__2, i__1); ll = (lf << 1) - 1; } i__1 = ll; @@ -1010,7 +1010,7 @@ L50: ll = 1; } else { i__2 = lvl - 1; - lf = pow_ii(&c__2, &i__2); + lf = pow_ii(c__2, i__2); ll = (lf << 1) - 1; } i__2 = lf; diff --git a/lapack-netlib/SRC/dlasd0.c b/lapack-netlib/SRC/dlasd0.c index c702665b0..0f88527ef 100644 --- a/lapack-netlib/SRC/dlasd0.c +++ b/lapack-netlib/SRC/dlasd0.c @@ -824,7 +824,7 @@ f"> */ ll = 1; } else { i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); + lf = pow_ii(c__2, i__1); ll = (lf << 1) - 1; } i__1 = ll; diff --git a/lapack-netlib/SRC/dlasda.c b/lapack-netlib/SRC/dlasda.c index 72f9d55f3..a9190f805 100644 --- a/lapack-netlib/SRC/dlasda.c +++ b/lapack-netlib/SRC/dlasda.c @@ -1027,7 +1027,7 @@ f"> */ /* Now conquer each subproblem bottom-up. */ - j = pow_ii(&c__2, &nlvl); + j = pow_ii(c__2, nlvl); for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; @@ -1039,7 +1039,7 @@ f"> */ ll = 1; } else { i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); + lf = pow_ii(c__2, i__1); ll = (lf << 1) - 1; } i__1 = ll; diff --git a/lapack-netlib/SRC/dstedc.c b/lapack-netlib/SRC/dstedc.c index ef2eeabe8..56511d6cf 100644 --- a/lapack-netlib/SRC/dstedc.c +++ b/lapack-netlib/SRC/dstedc.c @@ -806,10 +806,10 @@ f"> */ lwmin = *n - 1 << 1; } else { lgn = (integer) (log((doublereal) (*n)) / log(2.)); - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } if (icompz == 1) { diff --git a/lapack-netlib/SRC/slaed0.c b/lapack-netlib/SRC/slaed0.c index 33f7134c1..4c5230907 100644 --- a/lapack-netlib/SRC/slaed0.c +++ b/lapack-netlib/SRC/slaed0.c @@ -823,10 +823,10 @@ L10: temp = log((real) (*n)) / log(2.f); lgn = (integer) temp; - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } iprmpt = indxq + *n + 1; diff --git a/lapack-netlib/SRC/slaed7.c b/lapack-netlib/SRC/slaed7.c index 210d796d1..22fcaf76d 100644 --- a/lapack-netlib/SRC/slaed7.c +++ b/lapack-netlib/SRC/slaed7.c @@ -883,11 +883,11 @@ f"> */ /* Form the z-vector which consists of the last row of Q_1 and the */ /* first row of Q_2. */ - ptr = pow_ii(&c__2, tlvls) + 1; + ptr = pow_ii(c__2, *tlvls) + 1; i__1 = *curlvl - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *tlvls - i__; - ptr += pow_ii(&c__2, &i__2); + ptr += pow_ii(c__2, i__2); /* L10: */ } curr = ptr + *curpbm; diff --git a/lapack-netlib/SRC/slaeda.c b/lapack-netlib/SRC/slaeda.c index 7edaf8a76..3806427c2 100644 --- a/lapack-netlib/SRC/slaeda.c +++ b/lapack-netlib/SRC/slaeda.c @@ -753,7 +753,7 @@ f"> */ /* scheme */ i__1 = *curlvl - 1; - curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; + curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1; /* Determine size of these matrices. We add HALF to the value of */ /* the SQRT in case the machine underestimates one of these square */ @@ -779,12 +779,12 @@ f"> */ /* rotations and permutation and then multiplying the center matrices */ /* against the current Z. */ - ptr = pow_ii(&c__2, tlvls) + 1; + ptr = pow_ii(c__2, *tlvls) + 1; i__1 = *curlvl - 1; for (k = 1; k <= i__1; ++k) { i__2 = *curlvl - k; i__3 = *curlvl - k - 1; - curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - + curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) - 1; psiz1 = prmptr[curr + 1] - prmptr[curr]; psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; @@ -844,7 +844,7 @@ f"> */ c__1); i__2 = *tlvls - k; - ptr += pow_ii(&c__2, &i__2); + ptr += pow_ii(c__2, i__2); /* L70: */ } diff --git a/lapack-netlib/SRC/slalsa.c b/lapack-netlib/SRC/slalsa.c index 53da2c7bf..77a79b80c 100644 --- a/lapack-netlib/SRC/slalsa.c +++ b/lapack-netlib/SRC/slalsa.c @@ -946,7 +946,7 @@ f"> */ /* Finally go through the left singular vector matrices of all */ /* the other subproblems bottom-up on the tree. */ - j = pow_ii(&c__2, &nlvl); + j = pow_ii(c__2, nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { @@ -960,7 +960,7 @@ f"> */ ll = 1; } else { i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); + lf = pow_ii(c__2, i__1); ll = (lf << 1) - 1; } i__1 = ll; @@ -1005,7 +1005,7 @@ L50: ll = 1; } else { i__2 = lvl - 1; - lf = pow_ii(&c__2, &i__2); + lf = pow_ii(c__2, i__2); ll = (lf << 1) - 1; } i__2 = lf; diff --git a/lapack-netlib/SRC/slasd0.c b/lapack-netlib/SRC/slasd0.c index aa553579e..be1a74191 100644 --- a/lapack-netlib/SRC/slasd0.c +++ b/lapack-netlib/SRC/slasd0.c @@ -821,7 +821,7 @@ f"> */ ll = 1; } else { i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); + lf = pow_ii(c__2, i__1); ll = (lf << 1) - 1; } i__1 = ll; diff --git a/lapack-netlib/SRC/slasda.c b/lapack-netlib/SRC/slasda.c index 71424c3f1..1d336d1ce 100644 --- a/lapack-netlib/SRC/slasda.c +++ b/lapack-netlib/SRC/slasda.c @@ -1023,7 +1023,7 @@ f"> */ /* Now conquer each subproblem bottom-up. */ - j = pow_ii(&c__2, &nlvl); + j = pow_ii(c__2, nlvl); for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; @@ -1035,7 +1035,7 @@ f"> */ ll = 1; } else { i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); + lf = pow_ii(c__2, i__1); ll = (lf << 1) - 1; } i__1 = ll; diff --git a/lapack-netlib/SRC/sstedc.c b/lapack-netlib/SRC/sstedc.c index 46ed15a1a..61ad3dd37 100644 --- a/lapack-netlib/SRC/sstedc.c +++ b/lapack-netlib/SRC/sstedc.c @@ -804,10 +804,10 @@ f"> */ lwmin = *n - 1 << 1; } else { lgn = (integer) (log((real) (*n)) / log(2.f)); - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } if (icompz == 1) { diff --git a/lapack-netlib/SRC/zlaed0.c b/lapack-netlib/SRC/zlaed0.c index 37bd12b01..2b25f6e4e 100644 --- a/lapack-netlib/SRC/zlaed0.c +++ b/lapack-netlib/SRC/zlaed0.c @@ -793,10 +793,10 @@ L10: temp = log((doublereal) (*n)) / log(2.); lgn = (integer) temp; - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } iprmpt = indxq + *n + 1; diff --git a/lapack-netlib/SRC/zlaed7.c b/lapack-netlib/SRC/zlaed7.c index 093051917..8665ee12c 100644 --- a/lapack-netlib/SRC/zlaed7.c +++ b/lapack-netlib/SRC/zlaed7.c @@ -864,11 +864,11 @@ f"> */ /* Form the z-vector which consists of the last row of Q_1 and the */ /* first row of Q_2. */ - ptr = pow_ii(&c__2, tlvls) + 1; + ptr = pow_ii(c__2, *tlvls) + 1; i__1 = *curlvl - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *tlvls - i__; - ptr += pow_ii(&c__2, &i__2); + ptr += pow_ii(c__2, i__2); /* L10: */ } curr = ptr + *curpbm; diff --git a/lapack-netlib/SRC/zlalsa.c b/lapack-netlib/SRC/zlalsa.c index d17016e7d..cd0819c3d 100644 --- a/lapack-netlib/SRC/zlalsa.c +++ b/lapack-netlib/SRC/zlalsa.c @@ -1051,7 +1051,7 @@ f"> */ /* Finally go through the left singular vector matrices of all */ /* the other subproblems bottom-up on the tree. */ - j = pow_ii(&c__2, &nlvl); + j = pow_ii(c__2, nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { @@ -1065,7 +1065,7 @@ f"> */ ll = 1; } else { i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); + lf = pow_ii(c__2, i__1); ll = (lf << 1) - 1; } i__1 = ll; @@ -1110,7 +1110,7 @@ L170: ll = 1; } else { i__2 = lvl - 1; - lf = pow_ii(&c__2, &i__2); + lf = pow_ii(c__2, i__2); ll = (lf << 1) - 1; } i__2 = lf; diff --git a/lapack-netlib/SRC/zstedc.c b/lapack-netlib/SRC/zstedc.c index 4cfc41840..55baba2d7 100644 --- a/lapack-netlib/SRC/zstedc.c +++ b/lapack-netlib/SRC/zstedc.c @@ -836,10 +836,10 @@ f"> */ lrwmin = *n - 1 << 1; } else if (icompz == 1) { lgn = (integer) (log((doublereal) (*n)) / log(2.)); - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } - if (pow_ii(&c__2, &lgn) < *n) { + if (pow_ii(c__2, lgn) < *n) { ++lgn; } lwmin = *n * *n;