minor optimizations on zgemm_kernel for ARMV7

This commit is contained in:
wernsaar 2013-11-02 09:43:53 +01:00
parent 02bc36ac79
commit b3eab8fcb7
2 changed files with 20 additions and 4 deletions

View File

@ -26,11 +26,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
/**************************************************************************************
* 2013/10/16 Saar
* 2013/11/02 Saar
* BLASTEST : OK
* CTEST : OK
* TEST : OK
*
* 2013/11/02 Saar
* UNROLL_N 2
* UNROLL_M 2
* ZGEMM_P 64
* ZGEMM_Q 120
* ZGEMM_R 4096
* A_PRE 96
* B_PRE 96
* C_PRE 64
*
* Performance on Odroid U2:
*
* 1 Core: 1.62 GFLOPS ATLAS: 1.39 GFLOPS
* 2 Cores: 3.20 GFLOPS ATLAS: 2.54 GFLOPS
* 3 Cores: 4.72 GFLOPS ATLAS: 3.76 GFLOPS
* 4 Cores: 5.93 GFLOPS ATLAS: 4.88 GFLOPS
**************************************************************************************/
#define ASSEMBLER

View File

@ -1817,17 +1817,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SGEMM_DEFAULT_P 192
#define DGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_P 96
#define ZGEMM_DEFAULT_P 20
#define ZGEMM_DEFAULT_P 64
#define SGEMM_DEFAULT_Q 120
#define DGEMM_DEFAULT_Q 120
#define CGEMM_DEFAULT_Q 120
#define ZGEMM_DEFAULT_Q 64
#define ZGEMM_DEFAULT_Q 120
#define SGEMM_DEFAULT_R 16384
#define DGEMM_DEFAULT_R 8192
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 512
#define ZGEMM_DEFAULT_R 4096