POWER10: Fallback to POWER8 functions
As cgemm and zgemm kernels are not optimized for big endian falling back to POWER8 versions. Tested on AIX using gcc and Open XL C.
This commit is contained in:
		
							parent
							
								
									bf3183d31d
								
							
						
					
					
						commit
						82fc29a57a
					
				|  | @ -19,8 +19,13 @@ SBGEMMOTCOPYOBJ =  sbgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||||
| 
 | 
 | ||||||
| STRMMKERNEL	= sgemm_kernel_power10.c | STRMMKERNEL	= sgemm_kernel_power10.c | ||||||
| DTRMMKERNEL	= dgemm_kernel_power10.c | DTRMMKERNEL	= dgemm_kernel_power10.c | ||||||
|  | ifeq ($(OSNAME), AIX) | ||||||
|  | CTRMMKERNEL     = ctrmm_kernel_8x4_power8.S | ||||||
|  | ZTRMMKERNEL     = ztrmm_kernel_8x2_power8.S | ||||||
|  | else | ||||||
| CTRMMKERNEL	= cgemm_kernel_power10.S | CTRMMKERNEL	= cgemm_kernel_power10.S | ||||||
| ZTRMMKERNEL	= zgemm_kernel_power10.S | ZTRMMKERNEL	= zgemm_kernel_power10.S | ||||||
|  | endif | ||||||
| 
 | 
 | ||||||
| SGEMMKERNEL    =  sgemm_kernel_power10.c | SGEMMKERNEL    =  sgemm_kernel_power10.c | ||||||
| SGEMMINCOPY    = ../generic/gemm_ncopy_16.c | SGEMMINCOPY    = ../generic/gemm_ncopy_16.c | ||||||
|  | @ -62,10 +67,18 @@ DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_power10.c | ||||||
| DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_power10.c | DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_power10.c | ||||||
| DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c | DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c | ||||||
| 
 | 
 | ||||||
|  | ifeq ($(OSNAME), AIX) | ||||||
|  | CGEMMKERNEL    = cgemm_kernel_8x4_power8.S | ||||||
|  | else | ||||||
| CGEMMKERNEL    = cgemm_kernel_power10.S | CGEMMKERNEL    = cgemm_kernel_power10.S | ||||||
|  | endif | ||||||
| #CGEMMKERNEL     = cgemm_kernel_8x4_power8.S | #CGEMMKERNEL     = cgemm_kernel_8x4_power8.S | ||||||
| CGEMMINCOPY    = ../generic/zgemm_ncopy_8.c | CGEMMINCOPY    = ../generic/zgemm_ncopy_8.c | ||||||
|  | ifeq ($(OSNAME), AIX) | ||||||
|  | CGEMMITCOPY    = cgemm_tcopy_8_power8.S | ||||||
|  | else | ||||||
| CGEMMITCOPY    = ../generic/zgemm_tcopy_8.c | CGEMMITCOPY    = ../generic/zgemm_tcopy_8.c | ||||||
|  | endif | ||||||
| CGEMMONCOPY    = ../generic/zgemm_ncopy_4.c | CGEMMONCOPY    = ../generic/zgemm_ncopy_4.c | ||||||
| CGEMMOTCOPY    = ../generic/zgemm_tcopy_4.c | CGEMMOTCOPY    = ../generic/zgemm_tcopy_4.c | ||||||
| CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX) | CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||||
|  | @ -73,7 +86,11 @@ CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||||
| CGEMMINCOPYOBJ =  cgemm_incopy$(TSUFFIX).$(SUFFIX) | CGEMMINCOPYOBJ =  cgemm_incopy$(TSUFFIX).$(SUFFIX) | ||||||
| CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX) | CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX) | ||||||
| 
 | 
 | ||||||
|  | ifeq ($(OSNAME), AIX) | ||||||
|  | ZGEMMKERNEL    = zgemm_kernel_8x2_power8.S | ||||||
|  | else | ||||||
| ZGEMMKERNEL    = zgemm_kernel_power10.S | ZGEMMKERNEL    = zgemm_kernel_power10.S | ||||||
|  | endif | ||||||
| ZGEMMONCOPY    = ../generic/zgemm_ncopy_2.c | ZGEMMONCOPY    = ../generic/zgemm_ncopy_2.c | ||||||
| ZGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c | ZGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c | ||||||
| ZGEMMINCOPY    = ../generic/zgemm_ncopy_8.c | ZGEMMINCOPY    = ../generic/zgemm_ncopy_8.c | ||||||
|  | @ -124,6 +141,7 @@ ZTRSMKERNEL_RT	= ../generic/trsm_kernel_RT.c | ||||||
| #SMINKERNEL   = ../arm/min.c | #SMINKERNEL   = ../arm/min.c | ||||||
| #DMINKERNEL   = ../arm/min.c | #DMINKERNEL   = ../arm/min.c | ||||||
| # | # | ||||||
|  | ifeq ($(C_COMPILER), GCC) | ||||||
| ifneq ($(GCCVERSIONGTEQ9),1) | ifneq ($(GCCVERSIONGTEQ9),1) | ||||||
| ISAMAXKERNEL = isamax_power9.S | ISAMAXKERNEL = isamax_power9.S | ||||||
| else | else | ||||||
|  | @ -148,6 +166,15 @@ ICAMINKERNEL = icamin_power9.S | ||||||
| else | else | ||||||
| ICAMINKERNEL = icamin.c | ICAMINKERNEL = icamin.c | ||||||
| endif | endif | ||||||
|  | else | ||||||
|  | ISAMAXKERNEL = isamax.c | ||||||
|  | IDAMAXKERNEL = idamax.c | ||||||
|  | ICAMAXKERNEL = icamax.c | ||||||
|  | IZAMAXKERNEL = izamax.c | ||||||
|  | ISAMINKERNEL = isamin.c | ||||||
|  | IDAMINKERNEL = idamin.c | ||||||
|  | ICAMINKERNEL = icamin.c | ||||||
|  | endif | ||||||
| IZAMINKERNEL = izamin.c | IZAMINKERNEL = izamin.c | ||||||
| # | # | ||||||
| #ISMAXKERNEL  = ../arm/imax.c | #ISMAXKERNEL  = ../arm/imax.c | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue