Merge pull request #23 from xianyi/develop

rebase
2021-04-19 22:24:12 +02:00 · 2021-04-19 22:24:12 +02:00 · fc101b67e5
parent 0492f0f3f9 b0239a05fd
commit fc101b67e5
13 changed files with 1295 additions and 44 deletions
--- a/Makefile.x86
+++ b/Makefile.x86
@ -1,10 +1,21 @@
 # COMPILER_PREFIX = mingw32-
-ifdef HAVE_SSE
+ifndef DYNAMIC_ARCH
-CCOMMON_OPT += -msse
+ADD_CPUFLAGS = 1
-FCOMMON_OPT += -msse
+else
 ifdef TARGET_CORE
 ADD_CPUFLAGS = 1
 endif
 endif
 ifdef ADD_CPUFLAGS
 ifdef HAVE_SSE
 CCOMMON_OPT += -msse
 ifneq ($(F_COMPILER), NAG)
 FCOMMON_OPT += -msse
 endif
 endif
 endif
 ifeq ($(OSNAME), Interix)
 ARFLAGS		= -m x86
--- a/Makefile.x86_64
+++ b/Makefile.x86_64
@ -8,6 +8,16 @@ endif
 endif
 endif
 ifndef DYNAMIC_ARCH
 ADD_CPUFLAGS = 1
 else
 ifdef TARGET_CORE
 ADD_CPUFLAGS = 1
 endif
 endif
 ifdef ADD_CPUFLAGS
 ifdef HAVE_SSE3
 CCOMMON_OPT += -msse3
 ifneq ($(F_COMPILER), NAG)
@ -44,7 +54,6 @@ endif
 endif
 ifeq ($(CORE), SKYLAKEX)
 ifndef DYNAMIC_ARCH
 ifndef NO_AVX512
 CCOMMON_OPT += -march=skylake-avx512
 ifneq ($(F_COMPILER), NAG)
@ -62,10 +71,8 @@ endif
 endif
 endif
 endif
 endif
 ifeq ($(CORE), COOPERLAKE)
 ifndef DYNAMIC_ARCH
 ifndef NO_AVX512
 ifeq ($(C_COMPILER), GCC)
 # cooperlake support was added in 10.1
@ -88,7 +95,6 @@ endif
 endif
 endif
 endif
 endif
 ifdef HAVE_AVX2
 ifndef NO_AVX2
@ -120,6 +126,7 @@ endif
 endif
 endif
 endif
 ifeq ($(OSNAME), Interix)
--- a/cmake/system.cmake
+++ b/cmake/system.cmake
@ -299,6 +299,10 @@ if (NO_AVX2)
  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
 endif ()
 if (NO_AVX512)
  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
 endif ()
 if (USE_THREAD)
  # USE_SIMPLE_THREADED_LEVEL3 = 1
  # NO_AFFINITY = 1
--- a/driver/others/dynamic_arm64.c
+++ b/driver/others/dynamic_arm64.c
@ -126,7 +126,7 @@ extern void openblas_warning(int verbose, const char * msg);
 #endif
 #define get_cpu_ftr(id, var) ({					\
-		__asm__ ("mrs %0, "#id : "=r" (var));		\
+		__asm__ __volatile__ ("mrs %0, "#id : "=r" (var));		\
 	})
 static char *corename[] = {
--- a/kernel/power/KERNEL.POWER10
+++ b/kernel/power/KERNEL.POWER10
@ -186,7 +186,7 @@ ZSWAPKERNEL  = zswap.c
 SGEMVNKERNEL = sgemv_n.c
 DGEMVNKERNEL = dgemv_n_power10.c
 CGEMVNKERNEL = cgemv_n.c
-ZGEMVNKERNEL = zgemv_n_4.c
+ZGEMVNKERNEL =  zgemv_n_power10.c
 #
 SGEMVTKERNEL = sgemv_t.c
 DGEMVTKERNEL = dgemv_t_power10.c
--- a/kernel/power/dgemm_kernel_power10.c
+++ b/kernel/power/dgemm_kernel_power10.c
@ -190,10 +190,9 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
 	  __vector_quad acc0, acc1, acc2, acc3, acc4,acc5,acc6,acc7;
 	  BLASLONG l = 0;
 	  vec_t *rowA = (vec_t *) & AO[0];
 	  vec_t *rb = (vec_t *) & BO[0];
 	  __vector_pair rowB, rowB1;
-	  __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
+	  rowB = *((__vector_pair *)((void *)&BO[0]));
-	  __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
+	  rowB1 = *((__vector_pair *)((void *)&BO[4]));
 	  __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
 	  __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
 	  __builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
@ -205,9 +204,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
 	  for (l = 1; l < temp; l++)
 	    {
 	      rowA = (vec_t *) & AO[l << 3];
-	      rb = (vec_t *) & BO[l << 3];
+	      rowB = *((__vector_pair *)((void *)&BO[l << 3]));
-	      __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
+	      rowB1 = *((__vector_pair *)((void *)&BO[(l << 3) + 4]));
 	      __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
 	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
 	      __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
 	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
@ -247,9 +245,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
 	  BLASLONG l = 0;
 	  vec_t *rowA = (vec_t *) & AO[0];
 	  __vector_pair rowB, rowB1;
-	  vec_t *rb = (vec_t *) & BO[0];
+	  rowB = *((__vector_pair *)((void *)&BO[0]));
-	  __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
+	  rowB1 = *((__vector_pair *)((void *)&BO[4]));
 	  __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
 	  __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
 	  __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
 	  __builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
@ -257,9 +254,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
 	  for (l = 1; l < temp; l++)
 	    {
 	      rowA = (vec_t *) & AO[l << 2];
-	      rb = (vec_t *) & BO[l << 3];
+	      rowB = *((__vector_pair *)((void *)&BO[l << 3]));
-	      __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
+	      rowB1 = *((__vector_pair *)((void *)&BO[(l << 3) + 4]));
 	      __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
 	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
 	      __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
 	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
@ -291,17 +287,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
 	  BLASLONG l = 0;
 	  vec_t *rowA = (vec_t *) & AO[0];
 	  __vector_pair rowB, rowB1;
-	  vec_t *rb = (vec_t *) & BO[0];
+	  rowB = *((__vector_pair *)((void *)&BO[0]));
-	  __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
+	  rowB1 = *((__vector_pair *)((void *)&BO[4]));
 	  __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
 	  __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
 	  __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
 	  for (l = 1; l < temp; l++)
 	    {
 	      rowA = (vec_t *) & AO[l << 1];
-	      rb = (vec_t *) & BO[l << 3];
+	      rowB = *((__vector_pair *)((void *)&BO[l << 3]));
-	      __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
+	      rowB1 = *((__vector_pair *)((void *)&BO[(l << 3) + 4]));
 	      __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
 	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
 	      __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
 	    }
@ -403,8 +397,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
 	  BLASLONG l = 0;
 	  vec_t *rowA = (vec_t *) & AO[0];
 	  __vector_pair rowB;
-	  vec_t *rb = (vec_t *) & BO[0];
+	  rowB = *((__vector_pair *)((void *)&BO[0]));
 	  __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
 	  __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
 	  __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
 	  __builtin_mma_xvf64ger (&acc2, rowB, rowA[2]);
@ -412,8 +405,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
 	  for (l = 1; l < temp; l++)
 	    {
 	      rowA = (vec_t *) & AO[l << 3];
-	      rb = (vec_t *) & BO[l << 2];
+	      rowB = *((__vector_pair *)((void *)&BO[l << 2]));
 	      __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
 	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
 	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
 	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
@ -445,15 +437,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
 	  BLASLONG l = 0;
 	  vec_t *rowA = (vec_t *) & AO[0];
 	  __vector_pair rowB;
-	  vec_t *rb = (vec_t *) & BO[0];
+	  rowB = *((__vector_pair *)((void *)&BO[0]));
 	  __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
 	  __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
 	  __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
 	  for (l = 1; l < temp; l++)
 	    {
 	      rowA = (vec_t *) & AO[l << 2];
-	      rb = (vec_t *) & BO[l << 2];
+	      rowB = *((__vector_pair *)((void *)&BO[l << 2]));
 	      __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
 	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
 	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
 	    }
@ -481,14 +471,12 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
 	  BLASLONG l = 0;
 	  vec_t *rowA = (vec_t *) & AO[0];
 	  __vector_pair rowB;
-	  vec_t *rb = (vec_t *) & BO[0];
+	  rowB = *((__vector_pair *)((void *)&BO[0]));
 	  __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
 	  __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
 	  for (l = 1; l < temp; l++)
 	    {
 	      rowA = (vec_t *) & AO[l << 1];
-	      rb = (vec_t *) & BO[l << 2];
+	      rowB = *((__vector_pair *)((void *)&BO[l << 2]));
 	      __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
 	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
 	    }
 	  SAVE_ACC (&acc0, 0);
--- a/kernel/power/zgemv_n_power10.c
+++ b/kernel/power/zgemv_n_power10.c
--- a/kernel/power/zgemv_t_4.c
+++ b/kernel/power/zgemv_t_4.c
@ -43,6 +43,134 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #elif HAVE_KERNEL_4x4_VEC
 #if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
 typedef __vector unsigned char  vec_t;
 typedef FLOAT v4sf_t __attribute__ ((vector_size (16)));
 static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
    BLASLONG i;
    FLOAT *a0, *a1, *a2, *a3;
    a0 = ap;
    a1 = ap + lda;
    a2 = a1 + lda;
    a3 = a2 + lda;
    __vector_quad acc0, acc1, acc2, acc3;;
    __vector_quad acc4, acc5, acc6, acc7;
    v4sf_t result[4];
    __vector_pair *Va0, *Va1, *Va2, *Va3;
    i = 0;
    n = n << 1;
    __builtin_mma_xxsetaccz (&acc0);
    __builtin_mma_xxsetaccz (&acc1);
    __builtin_mma_xxsetaccz (&acc2);
    __builtin_mma_xxsetaccz (&acc3);
    __builtin_mma_xxsetaccz (&acc4);
    __builtin_mma_xxsetaccz (&acc5);
    __builtin_mma_xxsetaccz (&acc6);
    __builtin_mma_xxsetaccz (&acc7);
    while (i < n) {
 	vec_t *rx = (vec_t *) & x[i];
        Va0  = ((__vector_pair*)((void*)&a0[i]));
        Va1  = ((__vector_pair*)((void*)&a1[i]));
        Va2  = ((__vector_pair*)((void*)&a2[i]));
        Va3  = ((__vector_pair*)((void*)&a3[i]));
        __builtin_mma_xvf64gerpp (&acc0, Va0[0], rx[0]);
        __builtin_mma_xvf64gerpp (&acc1, Va1[0], rx[0]);
        __builtin_mma_xvf64gerpp (&acc2, Va2[0], rx[0]);
        __builtin_mma_xvf64gerpp (&acc3, Va3[0], rx[0]);
        __builtin_mma_xvf64gerpp (&acc4, Va0[0], rx[1]);
        __builtin_mma_xvf64gerpp (&acc5, Va1[0], rx[1]);
        __builtin_mma_xvf64gerpp (&acc6, Va2[0], rx[1]);
        __builtin_mma_xvf64gerpp (&acc7, Va3[0], rx[1]);
        __builtin_mma_xvf64gerpp (&acc0, Va0[1], rx[2]);
        __builtin_mma_xvf64gerpp (&acc1, Va1[1], rx[2]);
        __builtin_mma_xvf64gerpp (&acc2, Va2[1], rx[2]);
        __builtin_mma_xvf64gerpp (&acc3, Va3[1], rx[2]);
        __builtin_mma_xvf64gerpp (&acc4, Va0[1], rx[3]);
        __builtin_mma_xvf64gerpp (&acc5, Va1[1], rx[3]);
        __builtin_mma_xvf64gerpp (&acc6, Va2[1], rx[3]);
        __builtin_mma_xvf64gerpp (&acc7, Va3[1], rx[3]);
        i += 8;
    }
 #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
    __builtin_mma_disassemble_acc ((void *)result, &acc0);
    register FLOAT temp_r0 = result[0][0] - result[1][1];
    register FLOAT temp_i0 = result[0][1] + result[1][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc4);
    temp_r0 += result[2][0] - result[3][1];
    temp_i0 += result[2][1] + result[3][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc1);
    register FLOAT temp_r1 = result[0][0] - result[1][1];
    register FLOAT temp_i1 = result[0][1] + result[1][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc5);
    temp_r1 += result[2][0] - result[3][1];
    temp_i1 += result[2][1] + result[3][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc2);
    register FLOAT temp_r2 = result[0][0] - result[1][1];
    register FLOAT temp_i2 = result[0][1] + result[1][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc6);
    temp_r2 += result[2][0] - result[3][1];
    temp_i2 += result[2][1] + result[3][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc3);
    register FLOAT temp_r3 = result[0][0] - result[1][1];
    register FLOAT temp_i3 = result[0][1] + result[1][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc7);
    temp_r3 += result[2][0] - result[3][1];
    temp_i3 += result[2][1] + result[3][0];
 #else
    __builtin_mma_disassemble_acc ((void *)result, &acc0);
    register FLOAT temp_r0 = result[0][0] + result[1][1];
    register FLOAT temp_i0 = result[0][1] - result[1][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc4);
    temp_r0 += result[2][0] + result[3][1];
    temp_i0 += result[2][1] - result[3][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc1);
    register FLOAT temp_r1 = result[0][0] + result[1][1];
    register FLOAT temp_i1 = result[0][1] - result[1][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc5);
    temp_r1 += result[2][0] + result[3][1];
    temp_i1 += result[2][1] - result[3][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc2);
    register FLOAT temp_r2 = result[0][0] + result[1][1];
    register FLOAT temp_i2 = result[0][1] - result[1][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc6);
    temp_r2 += result[2][0] + result[3][1];
    temp_i2 += result[2][1] - result[3][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc3);
    register FLOAT temp_r3 = result[0][0] + result[1][1];
    register FLOAT temp_i3 = result[0][1] - result[1][0];
    __builtin_mma_disassemble_acc ((void *)result, &acc7);
    temp_r3 += result[2][0] + result[3][1];
    temp_i3 += result[2][1] - result[3][0];
 #endif
 #if !defined(XCONJ)
    y[0] += alpha_r * temp_r0 - alpha_i * temp_i0;
    y[1] += alpha_r * temp_i0 + alpha_i * temp_r0;
    y[2] += alpha_r * temp_r1 - alpha_i * temp_i1;
    y[3] += alpha_r * temp_i1 + alpha_i * temp_r1;
    y[4] += alpha_r * temp_r2 - alpha_i * temp_i2;
    y[5] += alpha_r * temp_i2 + alpha_i * temp_r2;
    y[6] += alpha_r * temp_r3 - alpha_i * temp_i3;
    y[7] += alpha_r * temp_i3 + alpha_i * temp_r3;
 #else
    y[0] += alpha_r * temp_r0 + alpha_i * temp_i0;
    y[1] -= alpha_r * temp_i0 - alpha_i * temp_r0;
    y[2] += alpha_r * temp_r1 + alpha_i * temp_i1;
    y[3] -= alpha_r * temp_i1 - alpha_i * temp_r1;
    y[4] += alpha_r * temp_r2 + alpha_i * temp_i2;
    y[5] -= alpha_r * temp_i2 - alpha_i * temp_r2;
    y[6] += alpha_r * temp_r3 + alpha_i * temp_i3;
    y[7] -= alpha_r * temp_i3 - alpha_i * temp_r3;
 #endif
 }
 #else
 static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
    BLASLONG i;
    FLOAT *a0, *a1, *a2, *a3;
@ -198,6 +326,7 @@ static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
 #endif
 }
 #endif
 #else
 static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
--- a/kernel/x86_64/sgemm_kernel_16x4_skylakex_3.c
+++ b/kernel/x86_64/sgemm_kernel_16x4_skylakex_3.c
@ -501,7 +501,11 @@ CNAME(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float * __restrict__ A, f
    int32_t permil[16] = {0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3};
    BLASLONG n_count = n;
    float *a_pointer = A,*b_pointer = B,*c_pointer = C,*ctemp = C,*next_b = B;
 #if defined(__clang__)
    for(;n_count>23;n_count-=24) COMPUTE(24)
 #else
    for(;n_count>23;n_count-=24) COMPUTE_n24
 #endif    
    for(;n_count>19;n_count-=20) COMPUTE(20)
    for(;n_count>15;n_count-=16) COMPUTE(16)
    for(;n_count>11;n_count-=12) COMPUTE(12)
--- a/lapack-netlib/SRC/chgeqz.f
+++ b/lapack-netlib/SRC/chgeqz.f
@ -319,14 +319,14 @@
      REAL               ABSB, ANORM, ASCALE, ATOL, BNORM, BSCALE, BTOL,
     $                   C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
      COMPLEX            ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
-     $                   CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1,
+     $                   CTEMP3, ESHIFT, S, SHIFT, SIGNBC,
     $                   U12, X, ABI12, Y
 *     ..
 *     .. External Functions ..
      COMPLEX            CLADIV
      LOGICAL            LSAME
      REAL               CLANHS, SLAMCH
-      EXTERNAL           CLADIV, LLSAME, CLANHS, SLAMCH
+      EXTERNAL           CLADIV, LSAME, CLANHS, SLAMCH
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           CLARTG, CLASET, CROT, CSCAL, XERBLA
@ -351,6 +351,7 @@
         ILSCHR = .TRUE.
         ISCHUR = 2
      ELSE
         ILSCHR = .TRUE.
         ISCHUR = 0
      END IF
 *
@ -364,6 +365,7 @@
         ILQ = .TRUE.
         ICOMPQ = 3
      ELSE
         ILQ = .TRUE.
         ICOMPQ = 0
      END IF
 *
@ -377,6 +379,7 @@
         ILZ = .TRUE.
         ICOMPZ = 3
      ELSE
         ILZ = .TRUE.
         ICOMPZ = 0
      END IF
 *
--- a/lapack-netlib/SRC/dlanv2.f
+++ b/lapack-netlib/SRC/dlanv2.f
@ -139,7 +139,7 @@
 *  =====================================================================
 *
 *     .. Parameters ..
-      DOUBLE PRECISION   ZERO, HALF, ONE
+      DOUBLE PRECISION   ZERO, HALF, ONE, TWO
      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0,
     $                     TWO = 2.0D0 )
      DOUBLE PRECISION   MULTPL
--- a/lapack-netlib/SRC/slanv2.f
+++ b/lapack-netlib/SRC/slanv2.f
@ -139,7 +139,7 @@
 *  =====================================================================
 *
 *     .. Parameters ..
-      REAL               ZERO, HALF, ONE
+      REAL               ZERO, HALF, ONE, TWO
      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0,
     $                     TWO = 2.0E+0 )
      REAL               MULTPL
--- a/lapack-netlib/SRC/zhgeqz.f
+++ b/lapack-netlib/SRC/zhgeqz.f
@ -319,7 +319,7 @@
      DOUBLE PRECISION   ABSB, ANORM, ASCALE, ATOL, BNORM, BSCALE, BTOL,
     $                   C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
      COMPLEX*16         ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
-     $                   CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1,
+     $                   CTEMP3, ESHIFT, S, SHIFT, SIGNBC,
     $                   U12, X, ABI12, Y
 *     ..
 *     .. External Functions ..
@ -352,6 +352,7 @@
         ILSCHR = .TRUE.
         ISCHUR = 2
      ELSE
         ILSCHR = .TRUE.
         ISCHUR = 0
      END IF
 *
@ -365,6 +366,7 @@
         ILQ = .TRUE.
         ICOMPQ = 3
      ELSE
         ILQ = .TRUE.
         ICOMPQ = 0
      END IF
 *
@ -378,6 +380,7 @@
         ILZ = .TRUE.
         ICOMPZ = 3
      ELSE
         ILZ = .TRUE.
         ICOMPZ = 0
      END IF
 *