From 04d51536a4c01a4bdb4ff96b2ab8ed0efcbd5ebd Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 1 Dec 2013 13:49:59 +0100 Subject: [PATCH 1/2] changed level3.c --- driver/level3/level3.c | 4 +--- driver/level3/level3_thread.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/driver/level3/level3.c b/driver/level3/level3.c index 2fe889527..5f746642c 100644 --- a/driver/level3/level3.c +++ b/driver/level3/level3.c @@ -333,9 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, for(jjs = js; jjs < js + min_j; jjs += min_jj){ min_jj = min_j + js - jjs; -#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) - if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N; - else +#if ( defined(BULLDOZER) || defined(PILEDRIVER) || defined(HASWELL) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; else if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 3242790fa..ee1a8db7c 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -367,9 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){ min_jj = MIN(n_to, xxx + div_n) - jjs; -#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) - if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N; - else +#if ( defined(BULLDOZER) || defined(PILEDRIVER) || defined(HASWELL) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; else if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; From 2a625447ead3e3c89f542fe23a3821b4b04163f4 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 1 Dec 2013 16:11:13 +0100 Subject: [PATCH 2/2] modified common.h --- common.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/common.h b/common.h index 309f246e2..4e3230d7e 100644 --- a/common.h +++ b/common.h @@ -310,6 +310,15 @@ typedef int blasint; #define YIELDING SwitchToThread() #endif +#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) +#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); +#endif + +#ifdef PILEDRIVER +#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); +#endif + + #ifndef YIELDING #define YIELDING sched_yield() #endif @@ -363,6 +372,15 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #include "common_mips64.h" #endif +#ifdef ARCH_ARM +#include "common_arm.h" +#endif + +#ifdef ARCH_ARM64 +#include "common_arm64.h" +#endif + + #ifdef OS_LINUX #include "common_linux.h" #endif