tests and code cleanup of gemm_kernels for HASWELL

This commit is contained in:
wernsaar 2013-10-28 14:23:48 +01:00
parent a77c71eaf5
commit afe44b0241
4 changed files with 135 additions and 155 deletions

View File

@ -1,61 +1,51 @@
/*********************************************************************/ /*********************************************************************************
/* Copyright 2009, 2010 The University of Texas at Austin. */ Copyright (c) 2013, The OpenBLAS Project
/* All rights reserved. */ All rights reserved.
/* */ Redistribution and use in source and binary forms, with or without
/* Redistribution and use in source and binary forms, with or */ modification, are permitted provided that the following conditions are
/* without modification, are permitted provided that the following */ met:
/* conditions are met: */ 1. Redistributions of source code must retain the above copyright
/* */ notice, this list of conditions and the following disclaimer.
/* 1. Redistributions of source code must retain the above */ 2. Redistributions in binary form must reproduce the above copyright
/* copyright notice, this list of conditions and the following */ notice, this list of conditions and the following disclaimer in
/* disclaimer. */ the documentation and/or other materials provided with the
/* */ distribution.
/* 2. Redistributions in binary form must reproduce the above */ 3. Neither the name of the OpenBLAS project nor the names of
/* copyright notice, this list of conditions and the following */ its contributors may be used to endorse or promote products
/* disclaimer in the documentation and/or other materials */ derived from this software without specific prior written permission.
/* provided with the distribution. */ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
/* */ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ **********************************************************************************/
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
/********************************************************************* /*********************************************************************
* 2013/10/19 Saar * 2013/10/28 Saar
* BLASTEST : * BLASTEST : OK
* CTEST : OK * CTEST : OK
* TEST : OK * TEST : OK
* *
* 2013/08/16 Saar * 2013/10/28 Saar
* Parameter: * Parameter:
* CGEMM_DEFAULT_UNROLL_N 2 * CGEMM_DEFAULT_UNROLL_N 2
* CGEMM_DEFAULT_UNROLL_M 8 * CGEMM_DEFAULT_UNROLL_M 8
* CGEMM_DEFAULT_P 224 * CGEMM_DEFAULT_P 384
* CGEMM_DEFAULT_Q 224 * CGEMM_DEFAULT_Q 192
* A_PR1 512
* B_PR1 512
* *
* BLASTEST: OK * Performance at 6912x6912x6912:
* 1 thread: 84 GFLOPS (SANDYBRIDGE: 60) (MKL: 86)
* 2 threads: 153 GFLOPS (SANDYBRIDGE: 114) (MKL: 155)
* 3 threads: 224 GFLOPS (SANDYBRIDGE: 162) (MKL: 222)
* 4 threads: 278 GFLOPS (SANDYBRIDGE: 223) (MKL: 279)
* *
* Performance:
* 1 thread: 2.04 times faster than sandybridge
* 4 threads: 1.96 times faster than sandybridge
*
* Compile for FMA3: OK
* *
*********************************************************************/ *********************************************************************/
@ -235,8 +225,8 @@
#endif #endif
#define A_PR1 384 #define A_PR1 512
#define B_PR1 192 #define B_PR1 512
/***************************************************************************************************************************/ /***************************************************************************************************************************/
@ -338,6 +328,9 @@
vmovups %ymm10 , (CO1, LDC) vmovups %ymm10 , (CO1, LDC)
vmovups %ymm14 , 8 * SIZE(CO1, LDC) vmovups %ymm14 , 8 * SIZE(CO1, LDC)
prefetcht0 64(CO1)
prefetcht0 64(CO1, LDC)
.endm .endm
/***************************************************************************************************************************/ /***************************************************************************************************************************/

View File

@ -1,61 +1,51 @@
/*********************************************************************/ /*********************************************************************************
/* Copyright 2009, 2010 The University of Texas at Austin. */ Copyright (c) 2013, The OpenBLAS Project
/* All rights reserved. */ All rights reserved.
/* */ Redistribution and use in source and binary forms, with or without
/* Redistribution and use in source and binary forms, with or */ modification, are permitted provided that the following conditions are
/* without modification, are permitted provided that the following */ met:
/* conditions are met: */ 1. Redistributions of source code must retain the above copyright
/* */ notice, this list of conditions and the following disclaimer.
/* 1. Redistributions of source code must retain the above */ 2. Redistributions in binary form must reproduce the above copyright
/* copyright notice, this list of conditions and the following */ notice, this list of conditions and the following disclaimer in
/* disclaimer. */ the documentation and/or other materials provided with the
/* */ distribution.
/* 2. Redistributions in binary form must reproduce the above */ 3. Neither the name of the OpenBLAS project nor the names of
/* copyright notice, this list of conditions and the following */ its contributors may be used to endorse or promote products
/* disclaimer in the documentation and/or other materials */ derived from this software without specific prior written permission.
/* provided with the distribution. */ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
/* */ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ **********************************************************************************/
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
/********************************************************************* /*********************************************************************
* 2013/10/19 Saar * 2013/10/28 Saar
* BLASTEST : * BLASTEST : OK
* CTEST : OK * CTEST : OK
* TEST : OK * TEST : OK
* *
* 2013/08/15 Saar * 2013/10/28 Saar
* Parameter: * Parameter:
* SGEMM_DEFAULT_UNROLL_N 4 * SGEMM_DEFAULT_UNROLL_N 4
* SGEMM_DEFAULT_UNROLL_M 16 * SGEMM_DEFAULT_UNROLL_M 16
* SGEMM_DEFAULT_P 768 * SGEMM_DEFAULT_P 768
* SGEMM_DEFAULT_Q 168 * SGEMM_DEFAULT_Q 384
* A_PR1 512
* B_PR1 512
* *
* BLASTEST: OK
* *
* Performance: * Performance at 9216x9216x9216:
* 1 thread: 2.22 times faster than sandybridge * 1 thread: 86 GFLOPS (SANDYBRIDGE: 59) (MKL: 83)
* 4 threads: 2.26 times faster than sandybridge * 2 threads: 157 GFLOPS (SANDYBRIDGE: 116) (MKL: 155)
* * 3 threads: 235 GFLOPS (SANDYBRIDGE: 165) (MKL: 230)
* Compile for FMA3: OK * 4 threads: 288 GFLOPS (SANDYBRIDGE: 223) (MKL: 267)
* *
*********************************************************************/ *********************************************************************/
@ -162,8 +152,8 @@
#endif #endif
#define A_PR1 384 #define A_PR1 512
#define B_PR1 192 #define B_PR1 512
/******************************************************************************************* /*******************************************************************************************
* 4 lines of N * 4 lines of N
@ -230,6 +220,11 @@
vmovups %ymm10, (CO2, LDC) vmovups %ymm10, (CO2, LDC)
vmovups %ymm11, 8 * SIZE(CO2, LDC) vmovups %ymm11, 8 * SIZE(CO2, LDC)
prefetcht0 64(CO1)
prefetcht0 64(CO1, LDC)
prefetcht0 64(CO2)
prefetcht0 64(CO2, LDC)
.endm .endm

View File

@ -1,62 +1,53 @@
/*********************************************************************/ /*********************************************************************************
/* Copyright 2009, 2010 The University of Texas at Austin. */ Copyright (c) 2013, The OpenBLAS Project
/* All rights reserved. */ All rights reserved.
/* */ Redistribution and use in source and binary forms, with or without
/* Redistribution and use in source and binary forms, with or */ modification, are permitted provided that the following conditions are
/* without modification, are permitted provided that the following */ met:
/* conditions are met: */ 1. Redistributions of source code must retain the above copyright
/* */ notice, this list of conditions and the following disclaimer.
/* 1. Redistributions of source code must retain the above */ 2. Redistributions in binary form must reproduce the above copyright
/* copyright notice, this list of conditions and the following */ notice, this list of conditions and the following disclaimer in
/* disclaimer. */ the documentation and/or other materials provided with the
/* */ distribution.
/* 2. Redistributions in binary form must reproduce the above */ 3. Neither the name of the OpenBLAS project nor the names of
/* copyright notice, this list of conditions and the following */ its contributors may be used to endorse or promote products
/* disclaimer in the documentation and/or other materials */ derived from this software without specific prior written permission.
/* provided with the distribution. */ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
/* */ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ **********************************************************************************/
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
/********************************************************************* /********************************************************************************
* 2013/10/19 Saar * 2013/10/28 Saar
* BLASTEST : * BLASTEST : OK
* CTEST : OK * CTEST : OK
* TEST : OK * TEST : OK
* *
* 2013/08/16 Saar * 2013/10/28 Saar
* Parameter: * Parameter:
* ZGEMM_DEFAULT_UNROLL_N 2 * ZGEMM_DEFAULT_UNROLL_N 2
* ZGEMM_DEFAULT_UNROLL_M 4 * ZGEMM_DEFAULT_UNROLL_M 4
* ZGEMM_DEFAULT_P 112 * ZGEMM_DEFAULT_P 256
* ZGEMM_DEFAULT_Q 224 * ZGEMM_DEFAULT_Q 128
* A_PR1 512
* B_PR1 512
* *
* *
* Performance: * Performance at 4608x4608x4608:
* 1 thread: 1.80 times faster than sandybridge * 1 thread: 43 GFLOPS (SANDYBRIDGE: 29) (MKL: 53)
* 4 threads: 1.74 times faster than sandybridge * 2 threads: 85 GFLOPS (SANDYBRIDGE: 59) (MKL: 100)
* 3 threads: 122 GFLOPS (SANDYBRIDGE: 86) (MKL: 138)
* 4 threads: 156 GFLOPS (SANDYBRIDGE: 108) (MKL: 172)
* *
* Compile for FMA3: OK ********************************************************************************/
*
*********************************************************************/
#define ASSEMBLER #define ASSEMBLER
@ -232,8 +223,8 @@
#endif #endif
#define A_PR1 384 #define A_PR1 512
#define B_PR1 192 #define B_PR1 512
/***************************************************************************************************/ /***************************************************************************************************/
.macro KERNEL4x2_SUB .macro KERNEL4x2_SUB
@ -335,7 +326,8 @@
vmovups %ymm10 , (CO1, LDC) vmovups %ymm10 , (CO1, LDC)
vmovups %ymm14 , 4 * SIZE(CO1, LDC) vmovups %ymm14 , 4 * SIZE(CO1, LDC)
prefetcht0 64(CO1)
prefetcht0 64(CO1, LDC)
.endm .endm

View File

@ -1223,12 +1223,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SGEMM_DEFAULT_P 768 #define SGEMM_DEFAULT_P 768
#define DGEMM_DEFAULT_P 512 #define DGEMM_DEFAULT_P 512
#define CGEMM_DEFAULT_P 384 #define CGEMM_DEFAULT_P 384
#define ZGEMM_DEFAULT_P 192 #define ZGEMM_DEFAULT_P 256
#define SGEMM_DEFAULT_Q 168 #define SGEMM_DEFAULT_Q 384
#define DGEMM_DEFAULT_Q 256 #define DGEMM_DEFAULT_Q 256
#define CGEMM_DEFAULT_Q 168 #define CGEMM_DEFAULT_Q 192
#define ZGEMM_DEFAULT_Q 168 #define ZGEMM_DEFAULT_Q 128
#define SGEMM_DEFAULT_R sgemm_r #define SGEMM_DEFAULT_R sgemm_r
//#define DGEMM_DEFAULT_R dgemm_r //#define DGEMM_DEFAULT_R dgemm_r