minor optimizations on zgemm_kernel for ARMV7
This commit is contained in:
parent
02bc36ac79
commit
b3eab8fcb7
|
@ -26,11 +26,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
/**************************************************************************************
|
/**************************************************************************************
|
||||||
* 2013/10/16 Saar
|
* 2013/11/02 Saar
|
||||||
* BLASTEST : OK
|
* BLASTEST : OK
|
||||||
* CTEST : OK
|
* CTEST : OK
|
||||||
* TEST : OK
|
* TEST : OK
|
||||||
*
|
*
|
||||||
|
* 2013/11/02 Saar
|
||||||
|
* UNROLL_N 2
|
||||||
|
* UNROLL_M 2
|
||||||
|
* ZGEMM_P 64
|
||||||
|
* ZGEMM_Q 120
|
||||||
|
* ZGEMM_R 4096
|
||||||
|
* A_PRE 96
|
||||||
|
* B_PRE 96
|
||||||
|
* C_PRE 64
|
||||||
|
*
|
||||||
|
* Performance on Odroid U2:
|
||||||
|
*
|
||||||
|
* 1 Core: 1.62 GFLOPS ATLAS: 1.39 GFLOPS
|
||||||
|
* 2 Cores: 3.20 GFLOPS ATLAS: 2.54 GFLOPS
|
||||||
|
* 3 Cores: 4.72 GFLOPS ATLAS: 3.76 GFLOPS
|
||||||
|
* 4 Cores: 5.93 GFLOPS ATLAS: 4.88 GFLOPS
|
||||||
**************************************************************************************/
|
**************************************************************************************/
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
|
|
6
param.h
6
param.h
|
@ -1817,17 +1817,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SGEMM_DEFAULT_P 192
|
#define SGEMM_DEFAULT_P 192
|
||||||
#define DGEMM_DEFAULT_P 128
|
#define DGEMM_DEFAULT_P 128
|
||||||
#define CGEMM_DEFAULT_P 96
|
#define CGEMM_DEFAULT_P 96
|
||||||
#define ZGEMM_DEFAULT_P 20
|
#define ZGEMM_DEFAULT_P 64
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 120
|
#define SGEMM_DEFAULT_Q 120
|
||||||
#define DGEMM_DEFAULT_Q 120
|
#define DGEMM_DEFAULT_Q 120
|
||||||
#define CGEMM_DEFAULT_Q 120
|
#define CGEMM_DEFAULT_Q 120
|
||||||
#define ZGEMM_DEFAULT_Q 64
|
#define ZGEMM_DEFAULT_Q 120
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_R 16384
|
#define SGEMM_DEFAULT_R 16384
|
||||||
#define DGEMM_DEFAULT_R 8192
|
#define DGEMM_DEFAULT_R 8192
|
||||||
#define CGEMM_DEFAULT_R 4096
|
#define CGEMM_DEFAULT_R 4096
|
||||||
#define ZGEMM_DEFAULT_R 512
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue