diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 84dd949a4..c81f2bf25 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -91,6 +91,59 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") + if (DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SASUMKERNEL}" "" "asum_k" false "" "" false "SINGLE") + if (DEFINED SMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${SMAXKERNEL}" "" "max_k" false "" "" false "SINGLE") + endif () + if (DEFINED SMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${SMINKERNEL}" "USE_MIN" "min_k" false "" "" false "SINGLE") + endif () + if (DEFINED ISMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ISMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "SINGLE") + endif () + if (DEFINED ISMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ISMAXKERNEL}" "" "i*max_k" false "" "" false "SINGLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${ISAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${ISAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SCOPYKERNEL}" "" "copy_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SSWAPKERNEL}" "" "swap_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SAXPYKERNEL}" "" "axpy_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") + endif () + if (DEFINED BUILD_COMPLEX16 AND NOT DEFINED BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE") + if (DEFINED DMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE") + endif () + if (DEFINED DMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE") + endif () + if (DEFINED IDMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE") + endif () + if (DEFINED IDMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") + endif () + # Makefile.L2 GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) @@ -124,7 +177,14 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) endif () endforeach () - + if (DEFINED BUILD_COMPLEX16 AND NOT DEFINED BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE") + endif () + if (DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") + endif () # Makefile.L3 set(USE_TRMM false) if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE)) @@ -159,6 +219,38 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) endif () GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) endforeach() + if (DEFINED BUILD_COMPLEX16 AND NOT DEFINED BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE") + if (DGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE") + endif () + if (DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE") + if (SGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "DOUBLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "DOUBLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "DOUBLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "DOUBLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${SGEMM_BETA}" "" "gemm_beta" false "" "" false "SINGLE") + endif () foreach (float_type ${FLOAT_TYPES}) string(SUBSTRING ${float_type} 0 1 float_char) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index c43520310..550af86a6 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -114,6 +114,7 @@ gotoblas_t TABLE_NAME = { #endif #endif +#if defined( BUILD_SINGLE) || defined(BUILD_COMPLEX) 0, 0, 0, SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, #ifdef SGEMM_DEFAULT_UNROLL_MN @@ -121,7 +122,7 @@ gotoblas_t TABLE_NAME = { #else MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), #endif - +#endif #ifdef HAVE_EXCLUSIVE_CACHE 1, @@ -129,19 +130,34 @@ gotoblas_t TABLE_NAME = { 0, #endif +#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) samax_kTS, samin_kTS, smax_kTS, smin_kTS, isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, - snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS, - dsdot_kTS, - srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, - sgemv_nTS, sgemv_tTS, sger_kTS, + snrm2_kTS, sasum_kTS, +#endif +#ifdef BUILD_SINGLE + ssum_kTS, +#endif + +#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) + scopy_kTS, sdot_kTS, +// dsdot_kTS, + srot_kTS, saxpy_kTS, + sscal_kTS, + sswap_kTS, + sgemv_nTS, sgemv_tTS, +#endif +#ifdef BUILD_SINGLE + sger_kTS, ssymv_LTS, ssymv_UTS, #ifdef ARCH_X86_64 sgemm_directTS, sgemm_direct_performantTS, #endif - +#endif + +#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) sgemm_kernelTS, sgemm_betaTS, #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N sgemm_incopyTS, sgemm_itcopyTS, @@ -149,6 +165,9 @@ gotoblas_t TABLE_NAME = { sgemm_oncopyTS, sgemm_otcopyTS, #endif sgemm_oncopyTS, sgemm_otcopyTS, +#endif + +#ifdef BUILD_SINGLE strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS, #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS, @@ -182,6 +201,9 @@ gotoblas_t TABLE_NAME = { NULL,NULL, #endif +#endif + +#if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16) 0, 0, 0, DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, #ifdef DGEMM_DEFAULT_UNROLL_MN @@ -189,14 +211,36 @@ gotoblas_t TABLE_NAME = { #else MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), #endif +#endif + +#if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16) damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, - dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS, - drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS, - dgemv_nTS, dgemv_tTS, dger_kTS, + dnrm2_kTS, dasum_kTS, +#endif +#if defined (BUILD_DOUBLE) + dsum_kTS, +#endif +#if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16) + dcopy_kTS, ddot_kTS, +#endif +#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE) + dsdot_kTS, +#endif +#if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16) + drot_kTS, + daxpy_kTS, + dscal_kTS, + dswap_kTS, + dgemv_nTS, dgemv_tTS, +#endif +#if defined (BUILD_DOUBLE) + dger_kTS, dsymv_LTS, dsymv_UTS, +#endif +#if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16) dgemm_kernelTS, dgemm_betaTS, #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N dgemm_incopyTS, dgemm_itcopyTS, @@ -204,6 +248,9 @@ gotoblas_t TABLE_NAME = { dgemm_oncopyTS, dgemm_otcopyTS, #endif dgemm_oncopyTS, dgemm_otcopyTS, +#endif + +#if defined (BUILD_DOUBLE) dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS, #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS, @@ -237,6 +284,8 @@ gotoblas_t TABLE_NAME = { NULL, NULL, #endif +#endif + #ifdef EXPRECISION 0, 0, 0, @@ -291,6 +340,7 @@ gotoblas_t TABLE_NAME = { #endif +#ifdef BUILD_COMPLEX 0, 0, 0, CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, #ifdef CGEMM_DEFAULT_UNROLL_MN @@ -426,6 +476,9 @@ gotoblas_t TABLE_NAME = { NULL, NULL, #endif +#endif + +#ifdef BUILD_COMPLEX16 0, 0, 0, ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, #ifdef ZGEMM_DEFAULT_UNROLL_MN @@ -560,6 +613,8 @@ gotoblas_t TABLE_NAME = { NULL, NULL, #endif +#endif + #ifdef EXPRECISION 0, 0, 0, @@ -691,25 +746,61 @@ gotoblas_t TABLE_NAME = { init_parameter, SNUMOPT, DNUMOPT, QNUMOPT, +#ifdef BUILD_SINGLE + saxpby_kTS, +#endif +#ifdef BUILD_DOUBLE + daxpby_kTS, +#endif +#ifdef BUILD_COMPLEX + caxpby_kTS, +#endif +#ifdef BUILD_COMPLEX16 + zaxpby_kTS, +#endif - saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS, - +#ifdef BUILD_SINGLE somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS, +#endif +#ifdef BUILD_DOUBLE domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS, +#endif +#ifdef BUILD_COMPLEX comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS, comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS, +#endif +#ifdef BUILD_COMPLEX16 zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, +#endif +#ifdef BUILD_SINGLE simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS, +#endif +#ifdef BUILD_DOUBLE dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS, +#endif +#ifdef BUILD_COMPLEX cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS, cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS, +#endif +#ifdef BUILD_COMPLEX16 zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS, zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS, +#endif - sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS - +#ifdef BUILD_SINGLE + sgeadd_kTS, +#endif +#ifdef BUILD_DOUBLE + dgeadd_kTS, +#endif +#ifdef BUILD_COMPLEX + cgeadd_kTS, +#endif +#ifdef BUILD_COMPLEX16 + zgeadd_kTS +#endif }; #if defined(ARCH_ARM64) @@ -717,26 +808,50 @@ static void init_parameter(void) { #if defined(BUILD_HALF) TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; #endif +#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #if defined(BUILD_HALF) TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; #endif +#ifdef BUILD_SINGLE TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; +#endif #if defined(BUILD_HALF) TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; #endif +#ifdef BUILD_SINGLE TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; @@ -989,22 +1104,34 @@ static void init_parameter(void) { TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; #endif +#ifdef BUILD_SINGLE TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; +#endif +#ifdef BUILD_COMPLEX #ifdef CGEMM3M_DEFAULT_Q TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q; #else TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q; #endif +#endif +#ifdef BUILD_COMPLEX16 #ifdef ZGEMM3M_DEFAULT_Q TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q; #else TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q; #endif +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; @@ -1018,10 +1145,18 @@ static void init_parameter(void) { fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = 64 * (l2 >> 7); +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = 32 * (l2 >> 7); +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = 32 * (l2 >> 7); +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = 16 * (l2 >> 7); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 16 * (l2 >> 7); TABLE_NAME.xgemm_p = 8 * (l2 >> 7); @@ -1034,10 +1169,18 @@ static void init_parameter(void) { fprintf(stderr, "Northwood\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = 96 * (l2 >> 7); +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = 48 * (l2 >> 7); +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = 48 * (l2 >> 7); +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = 24 * (l2 >> 7); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 24 * (l2 >> 7); TABLE_NAME.xgemm_p = 12 * (l2 >> 7); @@ -1050,10 +1193,18 @@ static void init_parameter(void) { fprintf(stderr, "Atom\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = 256; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = 128; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = 128; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = 64; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 64; TABLE_NAME.xgemm_p = 32; @@ -1066,10 +1217,18 @@ static void init_parameter(void) { fprintf(stderr, "Prescott\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = 56 * (l2 >> 7); +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = 28 * (l2 >> 7); +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = 28 * (l2 >> 7); +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = 14 * (l2 >> 7); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 14 * (l2 >> 7); TABLE_NAME.xgemm_p = 7 * (l2 >> 7); @@ -1082,10 +1241,18 @@ static void init_parameter(void) { fprintf(stderr, "Core2\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8; TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4; @@ -1098,10 +1265,18 @@ static void init_parameter(void) { fprintf(stderr, "Penryn\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; @@ -1114,10 +1289,18 @@ static void init_parameter(void) { fprintf(stderr, "Dunnington\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; @@ -1131,10 +1314,18 @@ static void init_parameter(void) { fprintf(stderr, "Nehalem\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1147,10 +1338,18 @@ static void init_parameter(void) { fprintf(stderr, "Sandybridge\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1163,10 +1362,18 @@ static void init_parameter(void) { fprintf(stderr, "Haswell\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1179,10 +1386,18 @@ static void init_parameter(void) { fprintf(stderr, "SkylakeX\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1196,10 +1411,18 @@ static void init_parameter(void) { fprintf(stderr, "Opteron\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7); +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7); +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7); +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7); TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7); @@ -1212,10 +1435,18 @@ static void init_parameter(void) { fprintf(stderr, "Barcelona\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1228,10 +1459,18 @@ static void init_parameter(void) { fprintf(stderr, "Bobcate\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1244,10 +1483,18 @@ static void init_parameter(void) { fprintf(stderr, "Bulldozer\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1260,10 +1507,18 @@ static void init_parameter(void) { fprintf(stderr, "Excavator\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1277,10 +1532,18 @@ static void init_parameter(void) { fprintf(stderr, "Piledriver\n"); #endif +#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1293,10 +1556,18 @@ static void init_parameter(void) { fprintf(stderr, "Steamroller\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1309,10 +1580,18 @@ static void init_parameter(void) { fprintf(stderr, "Zen\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1326,11 +1605,18 @@ static void init_parameter(void) { fprintf(stderr, "NANO\n"); #endif +#if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; - +#endif #ifdef EXPRECISION @@ -1340,41 +1626,55 @@ static void init_parameter(void) { #endif - +#ifdef BUILD_COMPLEX #ifdef CGEMM3M_DEFAULT_P TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; #else TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p; #endif +#endif +#ifdef BUILD_COMPLEX16 #ifdef ZGEMM3M_DEFAULT_P TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P; #else TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p; #endif +#endif #ifdef EXPRECISION TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; #endif - +#ifdef BUILD_SINGLE TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M; +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M; +#endif +#ifdef BUILD_COMPLEX #ifdef CGEMM3M_DEFAULT_UNROLL_M TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M; #else TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; #endif +#endif +#ifdef BUILD_COMPLEX16 #ifdef ZGEMM3M_DEFAULT_UNROLL_M TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M; #else TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; #endif +#endif #ifdef QUAD_PRECISION TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M; @@ -1386,15 +1686,19 @@ static void init_parameter(void) { fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); #endif +#ifdef BUILD_SINGLE TABLE_NAME.sgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15); +#endif +#ifdef BUILD_DOUBLE TABLE_NAME.dgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_r = (((BUFFER_SIZE - @@ -1403,26 +1707,33 @@ static void init_parameter(void) { ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15); #endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15); +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15); +#endif +#ifdef BUILD_COMPLEX TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE - ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15); +#endif +#ifdef BUILD_COMPLEX16 TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE - ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15); - +#endif