Merge branch 'xianyi:develop' into travispytorch
This commit is contained in:
commit
db1c6a0b0f
|
@ -0,0 +1,114 @@
|
|||
name: mips64 qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: MIPS64_GENERIC
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=MIPS64_GENERIC
|
||||
- target: SICORTEX
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=SICORTEX
|
||||
- target: I6400
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=I6400
|
||||
- target: P6600
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=P6600
|
||||
- target: I6500
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=I6500
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install build deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
|
||||
gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross
|
||||
|
||||
- name: checkout qemu
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: qemu/qemu
|
||||
path: qemu
|
||||
ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2
|
||||
|
||||
- name: build qemu
|
||||
run: |
|
||||
cd qemu
|
||||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system
|
||||
make -j$(nproc)
|
||||
make install
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: build OpenBLAS
|
||||
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
|
||||
|
||||
- name: test
|
||||
run: |
|
||||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
|
||||
qemu-mips64el ./utest/openblas_utest
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
|
|
@ -197,14 +197,14 @@ if (DEFINED TARGET)
|
|||
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
|
||||
if ((${TARGET} STREQUAL HASWELL OR ${TARGET} STREQUAL ZEN) AND NOT NO_AVX2)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
endif()
|
||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2 -mfma")
|
||||
endif()
|
||||
endif()
|
||||
if (DEFINED HAVE_AVX)
|
||||
|
|
4
common.h
4
common.h
|
@ -387,6 +387,10 @@ typedef int blasint;
|
|||
#endif
|
||||
*/
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
#define YIELDING
|
||||
#endif
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
|
|
5
ctest.c
5
ctest.c
|
@ -173,3 +173,8 @@ HAVE_C11
|
|||
ARCH_E2K
|
||||
#endif
|
||||
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
ARCH_RISCV64
|
||||
OS_WINDOWS
|
||||
#endif
|
||||
|
||||
|
|
|
@ -969,7 +969,7 @@ real *sfac;
|
|||
1.17 };
|
||||
|
||||
/* Local variables */
|
||||
extern /* Subroutine */ srottest_();
|
||||
extern /* Subroutine */ void srottest_();
|
||||
static integer i__, k, ksize;
|
||||
extern /* Subroutine */ int stest_(), srotmtest_();
|
||||
static integer ki, kn;
|
||||
|
|
|
@ -69,6 +69,8 @@
|
|||
|
||||
int blas_server_avail = 0;
|
||||
|
||||
extern int openblas_omp_adaptive_env();
|
||||
|
||||
static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
|
||||
#ifdef HAVE_C11
|
||||
static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
|
||||
|
|
|
@ -23,7 +23,7 @@ ifeq ($(C_COMPILER), CLANG)
|
|||
# Any clang posing as gcc 4.2 should be new enough (3.4 or later)
|
||||
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2)
|
||||
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||
AVX2OPT = -mavx2
|
||||
AVX2OPT = -mavx2 -mfma
|
||||
endif
|
||||
endif
|
||||
ifdef NO_AVX2
|
||||
|
@ -73,6 +73,8 @@ else ifeq ($(TARGET_CORE), SKYLAKEX)
|
|||
endif
|
||||
else ifeq ($(TARGET_CORE), HASWELL)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
|
||||
else ifeq ($(TARGET_CORE), ZEN)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
|
||||
else ifeq ($(TARGET_CORE), LOONGSON3R4)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
|
||||
else
|
||||
|
|
|
@ -39,10 +39,19 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
FLOAT x0, x1, x2, x3, y0, y1, y2, y3;
|
||||
v4f32 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;
|
||||
v4f32 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;
|
||||
#if defined(DSDOT)
|
||||
v2f64 dvx0, dvx1, dvx2, dvx3, dvx4, dvx5, dvx6, dvx7;
|
||||
v2f64 dvy0, dvy1, dvy2, dvy3, dvy4, dvy5, dvy6, dvy7;
|
||||
v2f64 dot0 = {0, 0};
|
||||
v2f64 dot1 = {0, 0};
|
||||
v2f64 dot2 = {0, 0};
|
||||
v2f64 dot3 = {0, 0};
|
||||
#else
|
||||
v4f32 dot0 = {0, 0, 0, 0};
|
||||
v4f32 dot1 = {0, 0, 0, 0};
|
||||
v4f32 dot2 = {0, 0, 0, 0};
|
||||
v4f32 dot3 = {0, 0, 0, 0};
|
||||
#endif
|
||||
|
||||
if (n < 1) return (dot);
|
||||
|
||||
|
@ -83,6 +92,61 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
x_pref += 32;
|
||||
y_pref += 32;
|
||||
|
||||
#if defined(DSDOT)
|
||||
/* Extend single precision to double precision */
|
||||
dvy0 = __msa_fexupr_d(vy0);
|
||||
dvy1 = __msa_fexupr_d(vy1);
|
||||
dvy2 = __msa_fexupr_d(vy2);
|
||||
dvy3 = __msa_fexupr_d(vy3);
|
||||
dvy4 = __msa_fexupr_d(vy4);
|
||||
dvy5 = __msa_fexupr_d(vy5);
|
||||
dvy6 = __msa_fexupr_d(vy6);
|
||||
dvy7 = __msa_fexupr_d(vy7);
|
||||
|
||||
vy0 = (v4f32)__msa_fexupl_d(vy0);
|
||||
vy1 = (v4f32)__msa_fexupl_d(vy1);
|
||||
vy2 = (v4f32)__msa_fexupl_d(vy2);
|
||||
vy3 = (v4f32)__msa_fexupl_d(vy3);
|
||||
vy4 = (v4f32)__msa_fexupl_d(vy4);
|
||||
vy5 = (v4f32)__msa_fexupl_d(vy5);
|
||||
vy6 = (v4f32)__msa_fexupl_d(vy6);
|
||||
vy7 = (v4f32)__msa_fexupl_d(vy7);
|
||||
|
||||
dvx0 = __msa_fexupr_d(vx0);
|
||||
dvx1 = __msa_fexupr_d(vx1);
|
||||
dvx2 = __msa_fexupr_d(vx2);
|
||||
dvx3 = __msa_fexupr_d(vx3);
|
||||
dvx4 = __msa_fexupr_d(vx4);
|
||||
dvx5 = __msa_fexupr_d(vx5);
|
||||
dvx6 = __msa_fexupr_d(vx6);
|
||||
dvx7 = __msa_fexupr_d(vx7);
|
||||
|
||||
vx0 = (v4f32)__msa_fexupl_d(vx0);
|
||||
vx1 = (v4f32)__msa_fexupl_d(vx1);
|
||||
vx2 = (v4f32)__msa_fexupl_d(vx2);
|
||||
vx3 = (v4f32)__msa_fexupl_d(vx3);
|
||||
vx4 = (v4f32)__msa_fexupl_d(vx4);
|
||||
vx5 = (v4f32)__msa_fexupl_d(vx5);
|
||||
vx6 = (v4f32)__msa_fexupl_d(vx6);
|
||||
vx7 = (v4f32)__msa_fexupl_d(vx7);
|
||||
|
||||
dot0 += (dvy0 * dvx0);
|
||||
dot1 += (dvy1 * dvx1);
|
||||
dot2 += (dvy2 * dvx2);
|
||||
dot3 += (dvy3 * dvx3);
|
||||
dot0 += (dvy4 * dvx4);
|
||||
dot1 += (dvy5 * dvx5);
|
||||
dot2 += (dvy6 * dvx6);
|
||||
dot3 += (dvy7 * dvx7);
|
||||
dot0 += ((v2f64)vy0 * (v2f64)vx0);
|
||||
dot1 += ((v2f64)vy1 * (v2f64)vx1);
|
||||
dot2 += ((v2f64)vy2 * (v2f64)vx2);
|
||||
dot3 += ((v2f64)vy3 * (v2f64)vx3);
|
||||
dot0 += ((v2f64)vy4 * (v2f64)vx4);
|
||||
dot1 += ((v2f64)vy5 * (v2f64)vx5);
|
||||
dot2 += ((v2f64)vy6 * (v2f64)vx6);
|
||||
dot3 += ((v2f64)vy7 * (v2f64)vx7);
|
||||
#else
|
||||
dot0 += (vy0 * vx0);
|
||||
dot1 += (vy1 * vx1);
|
||||
dot2 += (vy2 * vx2);
|
||||
|
@ -91,6 +155,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
dot1 += (vy5 * vx5);
|
||||
dot2 += (vy6 * vx6);
|
||||
dot3 += (vy7 * vx7);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (n & 31)
|
||||
|
@ -100,10 +165,41 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
LD_SP4_INC(x, 4, vx0, vx1, vx2, vx3);
|
||||
LD_SP4_INC(y, 4, vy0, vy1, vy2, vy3);
|
||||
|
||||
#if defined(DSDOT)
|
||||
dvy0 = __msa_fexupr_d(vy0);
|
||||
dvy1 = __msa_fexupr_d(vy1);
|
||||
dvy2 = __msa_fexupr_d(vy2);
|
||||
dvy3 = __msa_fexupr_d(vy3);
|
||||
|
||||
vy0 = (v4f32)__msa_fexupl_d(vy0);
|
||||
vy1 = (v4f32)__msa_fexupl_d(vy1);
|
||||
vy2 = (v4f32)__msa_fexupl_d(vy2);
|
||||
vy3 = (v4f32)__msa_fexupl_d(vy3);
|
||||
|
||||
dvx0 = __msa_fexupr_d(vx0);
|
||||
dvx1 = __msa_fexupr_d(vx1);
|
||||
dvx2 = __msa_fexupr_d(vx2);
|
||||
dvx3 = __msa_fexupr_d(vx3);
|
||||
|
||||
vx0 = (v4f32)__msa_fexupl_d(vx0);
|
||||
vx1 = (v4f32)__msa_fexupl_d(vx1);
|
||||
vx2 = (v4f32)__msa_fexupl_d(vx2);
|
||||
vx3 = (v4f32)__msa_fexupl_d(vx3);
|
||||
|
||||
dot0 += (dvy0 * dvx0);
|
||||
dot1 += (dvy1 * dvx1);
|
||||
dot2 += (dvy2 * dvx2);
|
||||
dot3 += (dvy3 * dvx3);
|
||||
dot0 += ((v2f64)vy0 * (v2f64)vx0);
|
||||
dot1 += ((v2f64)vy1 * (v2f64)vx1);
|
||||
dot2 += ((v2f64)vy2 * (v2f64)vx2);
|
||||
dot3 += ((v2f64)vy3 * (v2f64)vx3);
|
||||
#else
|
||||
dot0 += (vy0 * vx0);
|
||||
dot1 += (vy1 * vx1);
|
||||
dot2 += (vy2 * vx2);
|
||||
dot3 += (vy3 * vx3);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (n & 8)
|
||||
|
@ -111,8 +207,27 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
LD_SP2_INC(x, 4, vx0, vx1);
|
||||
LD_SP2_INC(y, 4, vy0, vy1);
|
||||
|
||||
#if defined(DSDOT)
|
||||
dvy0 = __msa_fexupr_d(vy0);
|
||||
dvy1 = __msa_fexupr_d(vy1);
|
||||
|
||||
vy0 = (v4f32)__msa_fexupl_d(vy0);
|
||||
vy1 = (v4f32)__msa_fexupl_d(vy1);
|
||||
|
||||
dvx0 = __msa_fexupr_d(vx0);
|
||||
dvx1 = __msa_fexupr_d(vx1);
|
||||
|
||||
vx0 = (v4f32)__msa_fexupl_d(vx0);
|
||||
vx1 = (v4f32)__msa_fexupl_d(vx1);
|
||||
|
||||
dot0 += (dvy0 * dvx0);
|
||||
dot1 += (dvy1 * dvx1);
|
||||
dot0 += ((v2f64)vy0 * (v2f64)vx0);
|
||||
dot1 += ((v2f64)vy1 * (v2f64)vx1);
|
||||
#else
|
||||
dot0 += (vy0 * vx0);
|
||||
dot1 += (vy1 * vx1);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (n & 4)
|
||||
|
@ -120,7 +235,16 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
vx0 = LD_SP(x); x += 4;
|
||||
vy0 = LD_SP(y); y += 4;
|
||||
|
||||
#if defined(DSDOT)
|
||||
dvy0 = __msa_fexupr_d(vy0);
|
||||
vy0 = (v4f32)__msa_fexupl_d(vy0);
|
||||
dvx0 = __msa_fexupr_d(vx0);
|
||||
vx0 = (v4f32)__msa_fexupl_d(vx0);
|
||||
dot0 += (dvy0 * dvx0);
|
||||
dot0 += ((v2f64)vy0 * (v2f64)vx0);
|
||||
#else
|
||||
dot0 += (vy0 * vx0);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (n & 2)
|
||||
|
@ -128,8 +252,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
LD_GP2_INC(x, 1, x0, x1);
|
||||
LD_GP2_INC(y, 1, y0, y1);
|
||||
|
||||
#if defined(DSDOT)
|
||||
dot += ((double)y0 * (double)x0);
|
||||
dot += ((double)y1 * (double)x1);
|
||||
#else
|
||||
dot += (y0 * x0);
|
||||
dot += (y1 * x1);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (n & 1)
|
||||
|
@ -137,7 +266,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
x0 = *x;
|
||||
y0 = *y;
|
||||
|
||||
#if defined(DSDOT)
|
||||
dot += ((double)y0 * (double)x0);
|
||||
#else
|
||||
dot += (y0 * x0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -145,8 +278,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
|
||||
dot += dot0[0];
|
||||
dot += dot0[1];
|
||||
#if !defined(DSDOT)
|
||||
dot += dot0[2];
|
||||
dot += dot0[3];
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -155,10 +290,17 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
LD_GP4_INC(x, inc_x, x0, x1, x2, x3);
|
||||
LD_GP4_INC(y, inc_y, y0, y1, y2, y3);
|
||||
|
||||
#if defined(DSDOT)
|
||||
dot += ((double)y0 * (double)x0);
|
||||
dot += ((double)y1 * (double)x1);
|
||||
dot += ((double)y2 * (double)x2);
|
||||
dot += ((double)y3 * (double)x3);
|
||||
#else
|
||||
dot += (y0 * x0);
|
||||
dot += (y1 * x1);
|
||||
dot += (y2 * x2);
|
||||
dot += (y3 * x3);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (n & 2)
|
||||
|
@ -166,8 +308,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
LD_GP2_INC(x, inc_x, x0, x1);
|
||||
LD_GP2_INC(y, inc_y, y0, y1);
|
||||
|
||||
#if defined(DSDOT)
|
||||
dot += ((double)y0 * (double)x0);
|
||||
dot += ((double)y1 * (double)x1);
|
||||
#else
|
||||
dot += (y0 * x0);
|
||||
dot += (y1 * x1);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (n & 1)
|
||||
|
@ -175,7 +322,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
x0 = *x;
|
||||
y0 = *y;
|
||||
|
||||
#if defined(DSDOT)
|
||||
dot += ((double)y0 * (double)x0);
|
||||
#else
|
||||
dot += (y0 * x0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,160 @@
|
|||
SGEMM_BETA = ../generic/gemm_beta.c
|
||||
DGEMM_BETA = ../generic/gemm_beta.c
|
||||
CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
|
||||
STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
|
||||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
#Pure C for other kernels
|
||||
SAMAXKERNEL = ../mips/amax.c
|
||||
DAMAXKERNEL = ../mips/amax.c
|
||||
CAMAXKERNEL = ../mips/zamax.c
|
||||
ZAMAXKERNEL = ../mips/zamax.c
|
||||
|
||||
SAMINKERNEL = ../mips/amin.c
|
||||
DAMINKERNEL = ../mips/amin.c
|
||||
CAMINKERNEL = ../mips/zamin.c
|
||||
ZAMINKERNEL = ../mips/zamin.c
|
||||
|
||||
SMAXKERNEL = ../mips/max.c
|
||||
DMAXKERNEL = ../mips/max.c
|
||||
|
||||
SMINKERNEL = ../mips/min.c
|
||||
DMINKERNEL = ../mips/min.c
|
||||
|
||||
ISAMAXKERNEL = ../mips/iamax.c
|
||||
IDAMAXKERNEL = ../mips/iamax.c
|
||||
ICAMAXKERNEL = ../mips/izamax.c
|
||||
IZAMAXKERNEL = ../mips/izamax.c
|
||||
|
||||
ISAMINKERNEL = ../mips/iamin.c
|
||||
IDAMINKERNEL = ../mips/iamin.c
|
||||
ICAMINKERNEL = ../mips/izamin.c
|
||||
IZAMINKERNEL = ../mips/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../mips/imax.c
|
||||
IDMAXKERNEL = ../mips/imax.c
|
||||
|
||||
ISMINKERNEL = ../mips/imin.c
|
||||
IDMINKERNEL = ../mips/imin.c
|
||||
|
||||
SASUMKERNEL = ../mips/asum.c
|
||||
DASUMKERNEL = ../mips/asum.c
|
||||
CASUMKERNEL = ../mips/zasum.c
|
||||
ZASUMKERNEL = ../mips/zasum.c
|
||||
|
||||
SSUMKERNEL = ../mips/sum.c
|
||||
DSUMKERNEL = ../mips/sum.c
|
||||
CSUMKERNEL = ../mips/zsum.c
|
||||
ZSUMKERNEL = ../mips/zsum.c
|
||||
|
||||
SAXPYKERNEL = ../mips/axpy.c
|
||||
DAXPYKERNEL = ../mips/axpy.c
|
||||
CAXPYKERNEL = ../mips/zaxpy.c
|
||||
ZAXPYKERNEL = ../mips/zaxpy.c
|
||||
|
||||
SCOPYKERNEL = ../mips/copy.c
|
||||
DCOPYKERNEL = ../mips/copy.c
|
||||
CCOPYKERNEL = ../mips/zcopy.c
|
||||
ZCOPYKERNEL = ../mips/zcopy.c
|
||||
|
||||
SDOTKERNEL = ../mips/dot.c
|
||||
DDOTKERNEL = ../mips/dot.c
|
||||
CDOTKERNEL = ../mips/zdot.c
|
||||
ZDOTKERNEL = ../mips/zdot.c
|
||||
|
||||
SNRM2KERNEL = ../mips/nrm2.c
|
||||
DNRM2KERNEL = ../mips/nrm2.c
|
||||
CNRM2KERNEL = ../mips/znrm2.c
|
||||
ZNRM2KERNEL = ../mips/znrm2.c
|
||||
|
||||
SROTKERNEL = ../mips/rot.c
|
||||
DROTKERNEL = ../mips/rot.c
|
||||
CROTKERNEL = ../mips/zrot.c
|
||||
ZROTKERNEL = ../mips/zrot.c
|
||||
|
||||
SSCALKERNEL = ../mips/scal.c
|
||||
DSCALKERNEL = ../mips/scal.c
|
||||
CSCALKERNEL = ../mips/zscal.c
|
||||
ZSCALKERNEL = ../mips/zscal.c
|
||||
|
||||
SSWAPKERNEL = ../mips/swap.c
|
||||
DSWAPKERNEL = ../mips/swap.c
|
||||
CSWAPKERNEL = ../mips/zswap.c
|
||||
ZSWAPKERNEL = ../mips/zswap.c
|
||||
|
||||
SGEMVNKERNEL = ../mips/gemv_n.c
|
||||
DGEMVNKERNEL = ../mips/gemv_n.c
|
||||
CGEMVNKERNEL = ../mips/zgemv_n.c
|
||||
ZGEMVNKERNEL = ../mips/zgemv_n.c
|
||||
|
||||
SGEMVTKERNEL = ../mips/gemv_t.c
|
||||
DGEMVTKERNEL = ../mips/gemv_t.c
|
||||
CGEMVTKERNEL = ../mips/zgemv_t.c
|
||||
ZGEMVTKERNEL = ../mips/zgemv_t.c
|
||||
|
||||
SSYMV_U_KERNEL = ../generic/symv_k.c
|
||||
SSYMV_L_KERNEL = ../generic/symv_k.c
|
||||
DSYMV_U_KERNEL = ../generic/symv_k.c
|
||||
DSYMV_L_KERNEL = ../generic/symv_k.c
|
||||
QSYMV_U_KERNEL = ../generic/symv_k.c
|
||||
QSYMV_L_KERNEL = ../generic/symv_k.c
|
||||
CSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
CSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
XSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
XSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
|
||||
ZHEMV_U_KERNEL = ../generic/zhemv_k.c
|
||||
ZHEMV_L_KERNEL = ../generic/zhemv_k.c
|
||||
|
||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
|
@ -90,7 +90,7 @@
|
|||
//Init INF
|
||||
lui TEMP, 0x7FF0
|
||||
dsll TEMP, TEMP, 32
|
||||
MTC1 TEMP, INF
|
||||
MTC TEMP, INF
|
||||
|
||||
LD a1, 0 * SIZE(X)
|
||||
daddiu N, N, -1
|
||||
|
|
|
@ -52,18 +52,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT * __restrict a, BLASLONG lda, FLOAT * __
|
|||
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
|
||||
FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
|
||||
FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
|
||||
FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
|
||||
FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
|
||||
FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
|
||||
FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
|
||||
FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
|
||||
FLOAT ctemp17 /*, ctemp18, ctemp19, ctemp20*/ ;
|
||||
FLOAT /*ctemp21, ctemp22,*/ ctemp23, ctemp24;
|
||||
FLOAT ctemp25 /*, ctemp26, ctemp27, ctemp28*/ ;
|
||||
FLOAT /*ctemp29, ctemp30,*/ ctemp31, ctemp32;
|
||||
FLOAT ctemp33 /*, ctemp34, ctemp35, ctemp36*/ ;
|
||||
FLOAT /*ctemp37, ctemp38,*/ ctemp39, ctemp40;
|
||||
FLOAT ctemp41 /*, ctemp42, ctemp43, ctemp44*/ ;
|
||||
FLOAT /*ctemp45, ctemp46,*/ ctemp47, ctemp48;
|
||||
FLOAT ctemp49 /*, ctemp50, ctemp51, ctemp52*/ ;
|
||||
FLOAT /*ctemp53, ctemp54,*/ ctemp55, ctemp56;
|
||||
FLOAT ctemp57 /*, ctemp58, ctemp59, ctemp60*/ ;
|
||||
FLOAT /*ctemp61, ctemp62,*/ ctemp63, ctemp64;
|
||||
|
||||
|
||||
aoffset = a;
|
||||
|
|
|
@ -142,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\
|
||||
}
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){
|
||||
float *src, *dst, *dst_tmp, *src_base, *dst_base;
|
||||
float *src, *dst, *dst_tmp=0, *src_base, *dst_base;
|
||||
uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0;
|
||||
BLASLONG cols_left, rows_done; float ALPHA = alpha;
|
||||
if(ALPHA==0.0){
|
||||
|
|
|
@ -796,10 +796,10 @@ L10:
|
|||
|
||||
temp = log((real) (*n)) / log(2.f);
|
||||
lgn = (integer) temp;
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
iprmpt = indxq + *n + 1;
|
||||
|
|
|
@ -864,11 +864,11 @@ f"> */
|
|||
/* Form the z-vector which consists of the last row of Q_1 and the */
|
||||
/* first row of Q_2. */
|
||||
|
||||
ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (i__ = 1; i__ <= i__1; ++i__) {
|
||||
i__2 = *tlvls - i__;
|
||||
ptr += pow_ii(&c__2, &i__2);
|
||||
ptr += pow_ii(c__2, i__2);
|
||||
/* L10: */
|
||||
}
|
||||
curr = ptr + *curpbm;
|
||||
|
|
|
@ -1051,7 +1051,7 @@ f"> */
|
|||
/* Finally go through the left singular vector matrices of all */
|
||||
/* the other subproblems bottom-up on the tree. */
|
||||
|
||||
j = pow_ii(&c__2, &nlvl);
|
||||
j = pow_ii(c__2, nlvl);
|
||||
sqre = 0;
|
||||
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
|
@ -1065,7 +1065,7 @@ f"> */
|
|||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__1);
|
||||
lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
|
@ -1110,7 +1110,7 @@ L170:
|
|||
ll = 1;
|
||||
} else {
|
||||
i__2 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__2);
|
||||
lf = pow_ii(c__2, i__2);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__2 = lf;
|
||||
|
|
|
@ -836,10 +836,10 @@ f"> */
|
|||
lrwmin = *n - 1 << 1;
|
||||
} else if (icompz == 1) {
|
||||
lgn = (integer) (log((real) (*n)) / log(2.f));
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
lwmin = *n * *n;
|
||||
|
|
|
@ -827,10 +827,10 @@ L10:
|
|||
|
||||
temp = log((doublereal) (*n)) / log(2.);
|
||||
lgn = (integer) temp;
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
iprmpt = indxq + *n + 1;
|
||||
|
|
|
@ -885,11 +885,11 @@ f"> */
|
|||
/* Form the z-vector which consists of the last row of Q_1 and the */
|
||||
/* first row of Q_2. */
|
||||
|
||||
ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (i__ = 1; i__ <= i__1; ++i__) {
|
||||
i__2 = *tlvls - i__;
|
||||
ptr += pow_ii(&c__2, &i__2);
|
||||
ptr += pow_ii(c__2, i__2);
|
||||
/* L10: */
|
||||
}
|
||||
curr = ptr + *curpbm;
|
||||
|
|
|
@ -754,7 +754,7 @@ f"> */
|
|||
/* scheme */
|
||||
|
||||
i__1 = *curlvl - 1;
|
||||
curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
|
||||
curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
|
||||
|
||||
/* Determine size of these matrices. We add HALF to the value of */
|
||||
/* the SQRT in case the machine underestimates one of these square */
|
||||
|
@ -781,12 +781,12 @@ f"> */
|
|||
/* rotations and permutation and then multiplying the center matrices */
|
||||
/* against the current Z. */
|
||||
|
||||
ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (k = 1; k <= i__1; ++k) {
|
||||
i__2 = *curlvl - k;
|
||||
i__3 = *curlvl - k - 1;
|
||||
curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
|
||||
curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
|
||||
1;
|
||||
psiz1 = prmptr[curr + 1] - prmptr[curr];
|
||||
psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
|
||||
|
@ -847,7 +847,7 @@ f"> */
|
|||
c__1);
|
||||
|
||||
i__2 = *tlvls - k;
|
||||
ptr += pow_ii(&c__2, &i__2);
|
||||
ptr += pow_ii(c__2, i__2);
|
||||
/* L70: */
|
||||
}
|
||||
|
||||
|
|
|
@ -951,7 +951,7 @@ f"> */
|
|||
/* Finally go through the left singular vector matrices of all */
|
||||
/* the other subproblems bottom-up on the tree. */
|
||||
|
||||
j = pow_ii(&c__2, &nlvl);
|
||||
j = pow_ii(c__2, nlvl);
|
||||
sqre = 0;
|
||||
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
|
@ -965,7 +965,7 @@ f"> */
|
|||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__1);
|
||||
lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
|
@ -1010,7 +1010,7 @@ L50:
|
|||
ll = 1;
|
||||
} else {
|
||||
i__2 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__2);
|
||||
lf = pow_ii(c__2, i__2);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__2 = lf;
|
||||
|
|
|
@ -824,7 +824,7 @@ f"> */
|
|||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__1);
|
||||
lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
|
|
|
@ -1027,7 +1027,7 @@ f"> */
|
|||
|
||||
/* Now conquer each subproblem bottom-up. */
|
||||
|
||||
j = pow_ii(&c__2, &nlvl);
|
||||
j = pow_ii(c__2, nlvl);
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
lvl2 = (lvl << 1) - 1;
|
||||
|
||||
|
@ -1039,7 +1039,7 @@ f"> */
|
|||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__1);
|
||||
lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
|
|
|
@ -806,10 +806,10 @@ f"> */
|
|||
lwmin = *n - 1 << 1;
|
||||
} else {
|
||||
lgn = (integer) (log((doublereal) (*n)) / log(2.));
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (icompz == 1) {
|
||||
|
|
|
@ -823,10 +823,10 @@ L10:
|
|||
|
||||
temp = log((real) (*n)) / log(2.f);
|
||||
lgn = (integer) temp;
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
iprmpt = indxq + *n + 1;
|
||||
|
|
|
@ -883,11 +883,11 @@ f"> */
|
|||
/* Form the z-vector which consists of the last row of Q_1 and the */
|
||||
/* first row of Q_2. */
|
||||
|
||||
ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (i__ = 1; i__ <= i__1; ++i__) {
|
||||
i__2 = *tlvls - i__;
|
||||
ptr += pow_ii(&c__2, &i__2);
|
||||
ptr += pow_ii(c__2, i__2);
|
||||
/* L10: */
|
||||
}
|
||||
curr = ptr + *curpbm;
|
||||
|
|
|
@ -753,7 +753,7 @@ f"> */
|
|||
/* scheme */
|
||||
|
||||
i__1 = *curlvl - 1;
|
||||
curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
|
||||
curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
|
||||
|
||||
/* Determine size of these matrices. We add HALF to the value of */
|
||||
/* the SQRT in case the machine underestimates one of these square */
|
||||
|
@ -779,12 +779,12 @@ f"> */
|
|||
/* rotations and permutation and then multiplying the center matrices */
|
||||
/* against the current Z. */
|
||||
|
||||
ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (k = 1; k <= i__1; ++k) {
|
||||
i__2 = *curlvl - k;
|
||||
i__3 = *curlvl - k - 1;
|
||||
curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
|
||||
curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
|
||||
1;
|
||||
psiz1 = prmptr[curr + 1] - prmptr[curr];
|
||||
psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
|
||||
|
@ -844,7 +844,7 @@ f"> */
|
|||
c__1);
|
||||
|
||||
i__2 = *tlvls - k;
|
||||
ptr += pow_ii(&c__2, &i__2);
|
||||
ptr += pow_ii(c__2, i__2);
|
||||
/* L70: */
|
||||
}
|
||||
|
||||
|
|
|
@ -946,7 +946,7 @@ f"> */
|
|||
/* Finally go through the left singular vector matrices of all */
|
||||
/* the other subproblems bottom-up on the tree. */
|
||||
|
||||
j = pow_ii(&c__2, &nlvl);
|
||||
j = pow_ii(c__2, nlvl);
|
||||
sqre = 0;
|
||||
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
|
@ -960,7 +960,7 @@ f"> */
|
|||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__1);
|
||||
lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
|
@ -1005,7 +1005,7 @@ L50:
|
|||
ll = 1;
|
||||
} else {
|
||||
i__2 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__2);
|
||||
lf = pow_ii(c__2, i__2);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__2 = lf;
|
||||
|
|
|
@ -821,7 +821,7 @@ f"> */
|
|||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__1);
|
||||
lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
|
|
|
@ -1023,7 +1023,7 @@ f"> */
|
|||
|
||||
/* Now conquer each subproblem bottom-up. */
|
||||
|
||||
j = pow_ii(&c__2, &nlvl);
|
||||
j = pow_ii(c__2, nlvl);
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
lvl2 = (lvl << 1) - 1;
|
||||
|
||||
|
@ -1035,7 +1035,7 @@ f"> */
|
|||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__1);
|
||||
lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
|
|
|
@ -804,10 +804,10 @@ f"> */
|
|||
lwmin = *n - 1 << 1;
|
||||
} else {
|
||||
lgn = (integer) (log((real) (*n)) / log(2.f));
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (icompz == 1) {
|
||||
|
|
|
@ -793,10 +793,10 @@ L10:
|
|||
|
||||
temp = log((doublereal) (*n)) / log(2.);
|
||||
lgn = (integer) temp;
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
iprmpt = indxq + *n + 1;
|
||||
|
|
|
@ -864,11 +864,11 @@ f"> */
|
|||
/* Form the z-vector which consists of the last row of Q_1 and the */
|
||||
/* first row of Q_2. */
|
||||
|
||||
ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (i__ = 1; i__ <= i__1; ++i__) {
|
||||
i__2 = *tlvls - i__;
|
||||
ptr += pow_ii(&c__2, &i__2);
|
||||
ptr += pow_ii(c__2, i__2);
|
||||
/* L10: */
|
||||
}
|
||||
curr = ptr + *curpbm;
|
||||
|
|
|
@ -1051,7 +1051,7 @@ f"> */
|
|||
/* Finally go through the left singular vector matrices of all */
|
||||
/* the other subproblems bottom-up on the tree. */
|
||||
|
||||
j = pow_ii(&c__2, &nlvl);
|
||||
j = pow_ii(c__2, nlvl);
|
||||
sqre = 0;
|
||||
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
|
@ -1065,7 +1065,7 @@ f"> */
|
|||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__1);
|
||||
lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
|
@ -1110,7 +1110,7 @@ L170:
|
|||
ll = 1;
|
||||
} else {
|
||||
i__2 = lvl - 1;
|
||||
lf = pow_ii(&c__2, &i__2);
|
||||
lf = pow_ii(c__2, i__2);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__2 = lf;
|
||||
|
|
|
@ -836,10 +836,10 @@ f"> */
|
|||
lrwmin = *n - 1 << 1;
|
||||
} else if (icompz == 1) {
|
||||
lgn = (integer) (log((doublereal) (*n)) / log(2.));
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (pow_ii(&c__2, &lgn) < *n) {
|
||||
if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
lwmin = *n * *n;
|
||||
|
|
Loading…
Reference in New Issue