Merge pull request #3399 from martin-frbg/issue2814
Improve performance on Apple M1 Vortex
This commit is contained in:
commit
b54b50fe3a
|
@ -1 +1 @@
|
||||||
include $(KERNELDIR)/KERNEL.ARMV8
|
include $(KERNELDIR)/KERNEL.NEOVERSEN1
|
||||||
|
|
4
param.h
4
param.h
|
@ -2972,7 +2972,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#if defined(CORTEXA57) || \
|
#if defined(CORTEXA57) || \
|
||||||
defined(CORTEXA72) || defined(CORTEXA73) || \
|
defined(CORTEXA72) || defined(CORTEXA73) || \
|
||||||
defined(FALKOR) || defined(TSV110) || defined(EMAG8180)
|
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -2989,7 +2989,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/*FIXME: this should be using the cache size, but there is currently no easy way to
|
/*FIXME: this should be using the cache size, but there is currently no easy way to
|
||||||
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
|
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
|
||||||
is a big desktop or server with abundant cache rather than a phone or embedded device */
|
is a big desktop or server with abundant cache rather than a phone or embedded device */
|
||||||
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180)
|
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
|
||||||
#define SGEMM_DEFAULT_P 512
|
#define SGEMM_DEFAULT_P 512
|
||||||
#define DGEMM_DEFAULT_P 256
|
#define DGEMM_DEFAULT_P 256
|
||||||
#define CGEMM_DEFAULT_P 256
|
#define CGEMM_DEFAULT_P 256
|
||||||
|
|
Loading…
Reference in New Issue