tests and code cleanup of gemm_kernels for HASWELL
This commit is contained in:
parent
a77c71eaf5
commit
afe44b0241
|
@ -1,61 +1,51 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
/*********************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************
|
||||
* 2013/10/19 Saar
|
||||
* BLASTEST :
|
||||
* 2013/10/28 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
* 2013/08/16 Saar
|
||||
* 2013/10/28 Saar
|
||||
* Parameter:
|
||||
* CGEMM_DEFAULT_UNROLL_N 2
|
||||
* CGEMM_DEFAULT_UNROLL_M 8
|
||||
* CGEMM_DEFAULT_P 224
|
||||
* CGEMM_DEFAULT_Q 224
|
||||
* CGEMM_DEFAULT_P 384
|
||||
* CGEMM_DEFAULT_Q 192
|
||||
* A_PR1 512
|
||||
* B_PR1 512
|
||||
*
|
||||
* BLASTEST: OK
|
||||
* Performance at 6912x6912x6912:
|
||||
* 1 thread: 84 GFLOPS (SANDYBRIDGE: 60) (MKL: 86)
|
||||
* 2 threads: 153 GFLOPS (SANDYBRIDGE: 114) (MKL: 155)
|
||||
* 3 threads: 224 GFLOPS (SANDYBRIDGE: 162) (MKL: 222)
|
||||
* 4 threads: 278 GFLOPS (SANDYBRIDGE: 223) (MKL: 279)
|
||||
*
|
||||
* Performance:
|
||||
* 1 thread: 2.04 times faster than sandybridge
|
||||
* 4 threads: 1.96 times faster than sandybridge
|
||||
*
|
||||
* Compile for FMA3: OK
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
|
@ -235,8 +225,8 @@
|
|||
#endif
|
||||
|
||||
|
||||
#define A_PR1 384
|
||||
#define B_PR1 192
|
||||
#define A_PR1 512
|
||||
#define B_PR1 512
|
||||
|
||||
/***************************************************************************************************************************/
|
||||
|
||||
|
@ -338,6 +328,9 @@
|
|||
vmovups %ymm10 , (CO1, LDC)
|
||||
vmovups %ymm14 , 8 * SIZE(CO1, LDC)
|
||||
|
||||
prefetcht0 64(CO1)
|
||||
prefetcht0 64(CO1, LDC)
|
||||
|
||||
.endm
|
||||
|
||||
/***************************************************************************************************************************/
|
||||
|
|
|
@ -1,61 +1,51 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
/*********************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************
|
||||
* 2013/10/19 Saar
|
||||
* BLASTEST :
|
||||
* 2013/10/28 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
* 2013/08/15 Saar
|
||||
* 2013/10/28 Saar
|
||||
* Parameter:
|
||||
* SGEMM_DEFAULT_UNROLL_N 4
|
||||
* SGEMM_DEFAULT_UNROLL_M 16
|
||||
* SGEMM_DEFAULT_P 768
|
||||
* SGEMM_DEFAULT_Q 168
|
||||
* SGEMM_DEFAULT_Q 384
|
||||
* A_PR1 512
|
||||
* B_PR1 512
|
||||
*
|
||||
* BLASTEST: OK
|
||||
*
|
||||
* Performance:
|
||||
* 1 thread: 2.22 times faster than sandybridge
|
||||
* 4 threads: 2.26 times faster than sandybridge
|
||||
*
|
||||
* Compile for FMA3: OK
|
||||
* Performance at 9216x9216x9216:
|
||||
* 1 thread: 86 GFLOPS (SANDYBRIDGE: 59) (MKL: 83)
|
||||
* 2 threads: 157 GFLOPS (SANDYBRIDGE: 116) (MKL: 155)
|
||||
* 3 threads: 235 GFLOPS (SANDYBRIDGE: 165) (MKL: 230)
|
||||
* 4 threads: 288 GFLOPS (SANDYBRIDGE: 223) (MKL: 267)
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
|
@ -162,8 +152,8 @@
|
|||
#endif
|
||||
|
||||
|
||||
#define A_PR1 384
|
||||
#define B_PR1 192
|
||||
#define A_PR1 512
|
||||
#define B_PR1 512
|
||||
|
||||
/*******************************************************************************************
|
||||
* 4 lines of N
|
||||
|
@ -230,6 +220,11 @@
|
|||
vmovups %ymm10, (CO2, LDC)
|
||||
vmovups %ymm11, 8 * SIZE(CO2, LDC)
|
||||
|
||||
prefetcht0 64(CO1)
|
||||
prefetcht0 64(CO1, LDC)
|
||||
prefetcht0 64(CO2)
|
||||
prefetcht0 64(CO2, LDC)
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
|
|
@ -1,62 +1,53 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
/*********************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************
|
||||
* 2013/10/19 Saar
|
||||
* BLASTEST :
|
||||
/********************************************************************************
|
||||
* 2013/10/28 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
* 2013/08/16 Saar
|
||||
* 2013/10/28 Saar
|
||||
* Parameter:
|
||||
* ZGEMM_DEFAULT_UNROLL_N 2
|
||||
* ZGEMM_DEFAULT_UNROLL_M 4
|
||||
* ZGEMM_DEFAULT_P 112
|
||||
* ZGEMM_DEFAULT_Q 224
|
||||
* ZGEMM_DEFAULT_P 256
|
||||
* ZGEMM_DEFAULT_Q 128
|
||||
* A_PR1 512
|
||||
* B_PR1 512
|
||||
*
|
||||
*
|
||||
* Performance:
|
||||
* 1 thread: 1.80 times faster than sandybridge
|
||||
* 4 threads: 1.74 times faster than sandybridge
|
||||
* Performance at 4608x4608x4608:
|
||||
* 1 thread: 43 GFLOPS (SANDYBRIDGE: 29) (MKL: 53)
|
||||
* 2 threads: 85 GFLOPS (SANDYBRIDGE: 59) (MKL: 100)
|
||||
* 3 threads: 122 GFLOPS (SANDYBRIDGE: 86) (MKL: 138)
|
||||
* 4 threads: 156 GFLOPS (SANDYBRIDGE: 108) (MKL: 172)
|
||||
*
|
||||
* Compile for FMA3: OK
|
||||
*
|
||||
*********************************************************************/
|
||||
********************************************************************************/
|
||||
|
||||
|
||||
#define ASSEMBLER
|
||||
|
@ -232,8 +223,8 @@
|
|||
|
||||
#endif
|
||||
|
||||
#define A_PR1 384
|
||||
#define B_PR1 192
|
||||
#define A_PR1 512
|
||||
#define B_PR1 512
|
||||
/***************************************************************************************************/
|
||||
|
||||
.macro KERNEL4x2_SUB
|
||||
|
@ -335,7 +326,8 @@
|
|||
vmovups %ymm10 , (CO1, LDC)
|
||||
vmovups %ymm14 , 4 * SIZE(CO1, LDC)
|
||||
|
||||
|
||||
prefetcht0 64(CO1)
|
||||
prefetcht0 64(CO1, LDC)
|
||||
|
||||
.endm
|
||||
|
||||
|
|
8
param.h
8
param.h
|
@ -1223,12 +1223,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SGEMM_DEFAULT_P 768
|
||||
#define DGEMM_DEFAULT_P 512
|
||||
#define CGEMM_DEFAULT_P 384
|
||||
#define ZGEMM_DEFAULT_P 192
|
||||
#define ZGEMM_DEFAULT_P 256
|
||||
|
||||
#define SGEMM_DEFAULT_Q 168
|
||||
#define SGEMM_DEFAULT_Q 384
|
||||
#define DGEMM_DEFAULT_Q 256
|
||||
#define CGEMM_DEFAULT_Q 168
|
||||
#define ZGEMM_DEFAULT_Q 168
|
||||
#define CGEMM_DEFAULT_Q 192
|
||||
#define ZGEMM_DEFAULT_Q 128
|
||||
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
//#define DGEMM_DEFAULT_R dgemm_r
|
||||
|
|
Loading…
Reference in New Issue