From cbafa8114fdc75e6b1a6fa0cdbc6b29d5e0d009d Mon Sep 17 00:00:00 2001 From: Rohit Goswami Date: Sun, 26 May 2024 23:02:27 +0000 Subject: [PATCH] ENH: Add more L3 symbols --- kernel/meson.build | 152 ++++++++++++++++++++++++++++++++++++++++++++- meson.build | 4 +- 2 files changed, 154 insertions(+), 2 deletions(-) diff --git a/kernel/meson.build b/kernel/meson.build index 21e63c361..65a9a6b06 100644 --- a/kernel/meson.build +++ b/kernel/meson.build @@ -448,10 +448,160 @@ base_kops = [ 'addl': ['-DCONJ', '-DNC']}, '_RC': {'dir': 'generic', 'kernel': 'ztrmmkernel_2x2.c', 'addl': ['-DCONJ', '-DCN']}, - } + }, }, }, }, + { 'base': '?trsm_kernel', + 'modes': { + 's': { + 'exts': { + '_LN': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c', + 'addl': ['-DLN', '-DUPPER', '-UCONJ']}, + '_LT': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c', + 'addl': ['-DLT', '-UUPPER', '-UCONJ']}, + '_RN': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c', + 'addl': ['-DRN', '-DUPPER', '-UCONJ']}, + '_RT': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c', + 'addl': ['-DRT', '-UUPPER', '-UCONJ']}, + }, + }, + 'd': { + 'exts': { + '_LN': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c', + 'addl': ['-DLN', '-DUPPER', '-UCONJ']}, + '_LT': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c', + 'addl': ['-DLT', '-UUPPER', '-UCONJ']}, + '_RN': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c', + 'addl': ['-DRN', '-DUPPER', '-UCONJ']}, + '_RT': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c', + 'addl': ['-DRT', '-UUPPER', '-UCONJ']}, + }, + }, + 'c': { + 'exts': { + '_LN': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c', + 'addl': ['-DLN', '-DUPPER', '-UCONJ']}, + '_LT': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c', + 'addl': ['-DLT', '-UUPPER', '-UCONJ']}, + '_LR': {'dir': 'generic', 'kernel': 'trsm_kernel_LN.c', + 'addl': ['-DLN', '-DUPPER', '-DCONJ']}, + '_LC': {'dir': 'generic', 'kernel': 'trsm_kernel_LT.c', + 'addl': ['-DLT', '-UUPPER', '-DCONJ']}, + '_RN': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c', + 'addl': ['-DRN', '-DUPPER', '-UCONJ']}, + '_RT': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c', + 'addl': ['-DRT', '-UUPPER', '-UCONJ']}, + '_RR': {'dir': 'generic', 'kernel': 'trsm_kernel_RN.c', + 'addl': ['-DRN', '-DUPPER', '-DCONJ']}, + '_RC': {'dir': 'generic', 'kernel': 'trsm_kernel_RT.c', + 'addl': ['-DRT', '-UUPPER', '-DCONJ']}, + }, + }, + }, + }, + { 'base': '?gemm3m_kernel', + 'modes': { + 'c': {'exts': {'': {'dir': 'x86_64', 'kernel': 'zgemm3m_kernel_8x4_sse3.S', + 'addl': ['-DNN']}}}, + 'z': {'exts': {'': {'dir': 'x86_64', 'kernel': 'zgemm3m_kernel_8x4_sse3.S', + 'addl': ['-DNN']}}}, + }, + }, + { 'base': '?trmm', + 'modes': { + 's': {'exts': { + # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size + '_iunucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c', + 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']}, + '_iunncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_8.c', + 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']}, + '_ilnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c', + 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']}, + '_ilnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_8.c', + 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']}, + '_iutucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c', + 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']}, + '_iutncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_8.c', + 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']}, + '_iltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c', + 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']}, + '_iltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_8.c', + 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']}, + '_ounucopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c', + 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']}, + '_ounncopy': {'dir': 'generic', 'kernel': 'trmm_uncopy_4.c', + 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']}, + '_olnucopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c', + 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']}, + '_olnncopy': {'dir': 'generic', 'kernel': 'trmm_lncopy_4.c', + 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']}, + '_outucopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c', + 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']}, + '_outncopy': {'dir': 'generic', 'kernel': 'trmm_utcopy_4.c', + 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']}, + '_oltucopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c', + 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']}, + '_oltncopy': {'dir': 'generic', 'kernel': 'trmm_ltcopy_4.c', + 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']}, + }}, + }, + }, + { 'base': '?trsm', + 'modes': { + 's': {'exts': { + # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size + '_iunucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c', + 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']}, + '_iunncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_8.c', + 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']}, + '_ilnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c', + 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']}, + '_ilnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_8.c', + 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']}, + '_iutucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c', + 'addl': ['-UOUTER', '-ULOWER', '-DUNIT']}, + '_iutncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_8.c', + 'addl': ['-UOUTER', '-ULOWER', '-UUNIT']}, + '_iltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c', + 'addl': ['-UOUTER', '-DLOWER', '-DUNIT']}, + '_iltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_8.c', + 'addl': ['-UOUTER', '-DLOWER', '-UUNIT']}, + '_ounucopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c', + 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']}, + '_ounncopy': {'dir': 'generic', 'kernel': 'trsm_uncopy_4.c', + 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']}, + '_olnucopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c', + 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']}, + '_olnncopy': {'dir': 'generic', 'kernel': 'trsm_lncopy_4.c', + 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']}, + '_outucopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c', + 'addl': ['-DOUTER', '-ULOWER', '-DUNIT']}, + '_outncopy': {'dir': 'generic', 'kernel': 'trsm_utcopy_4.c', + 'addl': ['-DOUTER', '-ULOWER', '-UUNIT']}, + '_oltucopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c', + 'addl': ['-DOUTER', '-DLOWER', '-DUNIT']}, + '_oltncopy': {'dir': 'generic', 'kernel': 'trsm_ltcopy_4.c', + 'addl': ['-DOUTER', '-DLOWER', '-UUNIT']}, + }}, + }, + }, + { 'base': '?symm', + 'modes': { + 's': {'exts': { + # TODO(rg): These actually use $(SGEMM_UNROLL_M) to choose the size + '_iutcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_8.c', + 'addl': ['-UOUTER', '-ULOWER']}, + '_iltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_8.c', + 'addl': ['-UOUTER', '-DLOWER']}, + '_outcopy': {'dir': 'generic', 'kernel': 'symm_ucopy_4.c', + 'addl': ['-DOUTER', '-ULOWER']}, + '_oltcopy': {'dir': 'generic', 'kernel': 'symm_lcopy_4.c', + 'addl': ['-DOUTER', '-DLOWER']}, + }}, + }, + }, + # TODO(rg): iunucopy stuff ] kernel_confs = [] diff --git a/meson.build b/meson.build index b49c60184..202edd164 100644 --- a/meson.build +++ b/meson.build @@ -270,7 +270,8 @@ ext_mappings = { '_L': {'def': ['LOWER']}, '_LN': {'def': ['LEFT'], 'undef': ['TRANSA'], 'except': ['?syrk', '?syrk_thread', - '?syr2k', '?herk', '?herk_kernel']}, + '?syr2k', '?herk', '?herk_kernel', + '?trsm_kernel']}, # Handle HEMV and HEMVREV better '_V': {'def': ['HEMV', 'HEMVREV', 'XCONJ'], 'undef': ['LOWER', 'CONJ']}, '_M': {'def': ['HEMV', 'HEMVREV', 'LOWER']}, @@ -428,6 +429,7 @@ symb_defs = { '?her2_thread': {'def': ['HER']}, '?hpr_thread': {'def': ['HEMV']}, '?trmm_kernel': {'def': ['TRMMKERNEL']}, + '?trsm_kernel': {'def': ['TRSMKERNEL']}, '?bgemm': {'def': ['HALF']}, 'cblas_?dotu_sub': {'def': ['CBLAS', 'FORCE_USE_STACK'], 'undef': ['CONJ']}, 'cblas_?dotc_sub': {'def': ['CBLAS', 'FORCE_USE_STACK', 'CONJ']},