From 854aecce824e22dd9cbb5cfd42999eba79062f7f Mon Sep 17 00:00:00 2001 From: Rohit Goswami Date: Mon, 20 May 2024 01:45:21 +0000 Subject: [PATCH] BLD: Add L3 driver symbols Bringing us up to 2247 symbols.. --- driver/level3/meson.build | 167 ++++++++++++++++++++++++++++++++++++ interface/meson.build | 175 ++++++++++++++++++++------------------ meson.build | 19 +++-- 3 files changed, 270 insertions(+), 91 deletions(-) diff --git a/driver/level3/meson.build b/driver/level3/meson.build index a66f2489f..faa20b54d 100644 --- a/driver/level3/meson.build +++ b/driver/level3/meson.build @@ -158,6 +158,173 @@ driver_kops = [ '_LN', '_LT']}, } }, + { 'base': '?hemm', + 'sources': { + 'zhemm_k.c': {'mode': ['c', 'z'], # 'x'], + # TODO(rg): Do we need ../../param.h ? + # See Makefile:1612 + 'exts': ['_LU', '_LL', + '_RU', '_RL']}, + } + }, + { 'base': '?hemm_thread', + 'sources': { + 'zhemm_k.c': {'mode': ['c', 'z'], # 'x'], + 'addl': ['-DTHREADED_LEVEL3'], + 'exts': ['_LU', '_LL', + '_RU', '_RL']}, + } + }, + { 'base': '?herk', + 'sources': { + 'zherk_k.c': {'mode': ['c', 'z'], # 'x'], + 'addl': ['-DHERK'], + # TODO(rg): Do we need ../../common.h ? + # See Makefile:1684 + 'exts': ['_UN', '_UC', + '_LN', '_LC']}, + } + }, + { 'base': '?herk_kernel', + 'sources': { + 'zherk_kernel.c': {'mode': ['c', 'z'], # 'x'], + 'addl': ['-DHERK'], + 'exts': ['_UN', '_UC', + '_LN', '_LC']}, + } + }, + { 'base': '?herk_thread', + 'sources': { + 'zherk_k.c': {'mode': ['c', 'z'], # 'x'], + 'addl': ['-DHERK', '-DTHREADED_LEVEL3'], + 'exts': ['_UN', '_UC', + '_LN', '_LC']}, + } + }, + { 'base': '?her2k', + 'sources': { + 'zher2k_k.c': {'mode': ['c', 'z'], # 'x'], + 'addl': ['-DHER2K'], + # TODO(rg): Do we need ../../common.h ? + # See Makefile:1793 + 'exts': ['_UN', '_UC', + '_LN', '_LC']}, + } + }, + { 'base': '?her2k_kernel', + 'sources': { + 'zher2k_kernel.c': {'mode': ['c', 'z'], # 'x'], + # TODO(rg): Do we need ../../common.h ? + # See Makefile:1793 + 'exts': ['_UN', '_UC', + '_LN', '_LC']}, + } + }, + { 'base': '?gemm3m', + 'sources': { + 'gemm3m.c': {'mode': ['c', 'z',],# 'x'], + 'srcs': ['level3.c'], + 'exts': ['_nn', '_nt', + '_nr', '_nc', + '_tn', '_tt', + '_tr', '_tc', + '_rn', '_rt', + '_rr', '_rc', + '_cn', '_ct', + '_cr', '_cc']}, + } + }, + # { 'base': '?gemmf', + # 'sources': { + # # TODO(rg): This in the makefile:4401 but the file isn't there.. + # 'zgemmf.c': {'mode': ['c', 'z',],# 'x'], + # 'srcs': ['level3.c'], + # 'exts': ['']}, + # } + # }, + { 'base': '?gemm3m_thread', + 'sources': { + 'gemm3m.c': {'mode': ['c', 'z',],# 'x'], + 'addl': ['-DTHREADED_LEVEL3'], + 'srcs': ['level3.c'], + 'exts': ['_nn', '_nt', + '_nr', '_nc', + '_tn', '_tt', + '_tr', '_tc', + '_rn', '_rt', + '_rr', '_rc', + '_cn', '_ct', + '_cr', '_cc']}, + } + }, + { 'base': '?symm3m', + 'sources': { + 'symm3m_k.c': {'mode': ['c', 'z'], # 'x'], + 'exts': ['_LU', '_LL', + '_RU', '_RL']}, + } + }, + { 'base': '?symm3m_thread', + 'sources': { + 'symm3m_k.c': {'mode': ['c', 'z'], # 'x'], + 'addl': ['-DTHREADED_LEVEL3'], + 'exts': ['_LU', '_LL', + '_RU', '_RL']}, + } + }, + { 'base': '?hemm3m', + 'sources': { + 'hemm3m_k.c': {'mode': ['c', 'z'], # 'x'], + 'exts': ['_LU', '_LL', + '_RU', '_RL']}, + } + }, + { 'base': '?hemm3m_thread', + 'sources': { + 'hemm3m_k.c': {'mode': ['c', 'z'], # 'x'], + 'addl': ['-DTHREADED_LEVEL3'], + 'exts': ['_LU', '_LL', + '_RU', '_RL']}, + } + }, + { 'base': '?trsm', + 'sources': { + 'trsm_L.c': {'mode': ['s', 'd'],# 'q'], + 'exts': ['_LNUU', '_LNUN', + '_LNLU', '_LNLN', + '_LTUU', '_LTUN', + '_LTLU', '_LTLN']}, + 'trsm_R.c': {'mode': ['s', 'd'],# 'q'], + 'exts': ['_RNUU', '_RNUN', + '_RNLU', '_RNLN', + '_RTUU', '_RTUN', + '_RTLU', '_RTLN']}, + } + }, + { 'base': '?trsm', + 'sources': { + 'trsm_L.c': {'mode': ['c', 'z'],# 'x'], + 'exts': ['_LNUU', '_LNUN', + '_LNLU', '_LNLN', + '_LTUU', '_LTUN', + '_LTLU', '_LTLN', + '_LRUU', '_LRUN', + '_LRLU', '_LRLN', + '_LCUU', '_LCUN', + '_LCLU', '_LCLN', + ]}, + 'trsm_R.c': {'mode': ['c', 'z'],# 'x'], + 'exts': ['_RNUU', '_RNUN', + '_RNLU', '_RNLN', + '_RTUU', '_RTUN', + '_RTLU', '_RTLN', + '_RRUU', '_RRUN', + '_RRLU', '_RRLN', + '_RCUU', '_RCUN', + '_RCLU', '_RCLN', + ]}, + } + }, ] # Initialize kernel configurations list diff --git a/interface/meson.build b/interface/meson.build index 18abcba12..866ab1e16 100644 --- a/interface/meson.build +++ b/interface/meson.build @@ -349,6 +349,7 @@ _blas_roots = [ }, { 'base': '?herk', '_types': ['c', 'z', 'x'], 'fname': 'syrk.c', + 'addl': ['-DHEMM'], 'cblas': true, }, { 'base': '?her2k', '_types': ['c', 'z', 'x'], @@ -556,102 +557,106 @@ _blas_roots = [ _interface_libs = [] foreach conf : _blas_roots - foreach type : conf['_types'] - if 'q' in type or 'x' in type - # TODO: Figure out when to build these - # These are the XDOUBLE symbols - continue - endif - # Seed with common args - compiler_args = _cargs + interface_args - # Generate the symbol flags - base = conf['base'] - if symb_defs.has_key(base) - symb_base = symb_defs[base] - if symb_base.has_key('def') - foreach _d : symb_base['def'] - compiler_args += ('-D' + _d) - endforeach - endif - if symb_base.has_key('undef') - foreach _u : symb_base['undef'] - compiler_args += ('-U' + _u) - endforeach - endif - endif - # Set the type arguments - if precision_mappings.get(type).has_key('def') - foreach d : precision_mappings[type]['def'] - compiler_args += ['-D' + d] + foreach type : conf['_types'] + if 'q' in type or 'x' in type + # TODO: Figure out when to build these + # These are the XDOUBLE symbols + continue + endif + # Seed with common args + compiler_args = _cargs + interface_args + # Generate the symbol flags + base = conf['base'] + if symb_defs.has_key(base) + symb_base = symb_defs[base] + if symb_base.has_key('def') + foreach _d : symb_base['def'] + compiler_args += ('-D' + _d) endforeach endif - if precision_mappings.get(type).has_key('undef') - foreach u : precision_mappings[type]['undef'] - compiler_args += ['-U' + u] + if symb_base.has_key('undef') + foreach _u : symb_base['undef'] + compiler_args += ('-U' + _u) endforeach endif + endif + # Set the type arguments + if precision_mappings.get(type).has_key('def') + foreach d : precision_mappings[type]['def'] + compiler_args += ['-D' + d] + endforeach + endif + if precision_mappings.get(type).has_key('undef') + foreach u : precision_mappings[type]['undef'] + compiler_args += ['-U' + u] + endforeach + endif - # Construct the actual symbol names, and mangled symbols - # TODO: This might be conditional on other options - sym_name = conf['base'].replace('?', type) - sym_underscored = f'@sym_name@_' - if conf.get('cblas', false) - cblas_sym_name = 'cblas_' + sym_name - cblas_sym_underscored = f'@cblas_sym_name@_' + if conf.has_key('addl') + compiler_args += conf['addl'] + endif + + # Construct the actual symbol names, and mangled symbols + # TODO: This might be conditional on other options + sym_name = conf['base'].replace('?', type) + sym_underscored = f'@sym_name@_' + if conf.get('cblas', false) + cblas_sym_name = 'cblas_' + sym_name + cblas_sym_underscored = f'@cblas_sym_name@_' + endif + + # Construct conditionals + if conf.has_key('def') + foreach d : conf['def'] + compiler_args += ['-D' + d] + endforeach + foreach u : conf['undef'] + compiler_args += ['-U' + u] + endforeach + endif + + # Make mangled symbols + # TODO: This might be conditional on other options + + # Create the static library for each symbol + lib = static_library( + sym_name, + sources: conf['fname'], + include_directories: _inc, + c_args: compiler_args + [ + f'-DASMNAME=@sym_name@', + f'-DASMFNAME=@sym_underscored@', + f'-DNAME=@sym_underscored@', + f'-DCNAME=@sym_name@', + f'-DCHAR_NAME="@sym_underscored@"', + f'-DCHAR_CNAME="@sym_name@"' + ] + ) + _interface_libs += lib + + # If it's a CBLAS symbol, also create that + if conf.get('cblas', false) + if 'q' in type or 'x' in type + # There are no cblas_q symbols + # TODO: Handle edge cases around dz zd sc + continue endif - - # Construct conditionals - if conf.has_key('def') - foreach d : conf['def'] - compiler_args += ['-D' + d] - endforeach - foreach u : conf['undef'] - compiler_args += ['-U' + u] - endforeach - endif - - # Make mangled symbols - # TODO: This might be conditional on other options - - # Create the static library for each symbol - lib = static_library( - sym_name, + cblas_lib = static_library( + cblas_sym_name, sources: conf['fname'], include_directories: _inc, c_args: compiler_args + [ - f'-DASMNAME=@sym_name@', - f'-DASMFNAME=@sym_underscored@', - f'-DNAME=@sym_underscored@', - f'-DCNAME=@sym_name@', - f'-DCHAR_NAME="@sym_underscored@"', - f'-DCHAR_CNAME="@sym_name@"' + '-DCBLAS', + f'-DASMNAME=@cblas_sym_name@', + f'-DASMFNAME=@cblas_sym_underscored@', + f'-DNAME=@cblas_sym_underscored@', + f'-DCNAME=@cblas_sym_name@', + f'-DCHAR_NAME="@cblas_sym_underscored@"', + f'-DCHAR_CNAME="@cblas_sym_name@"' ] ) - _interface_libs += lib - - # If it's a CBLAS symbol, also create that - if conf.get('cblas', false) - if 'q' in type or 'x' in type - # There are no cblas_q symbols - # TODO: Handle edge cases around dz zd sc - continue - endif - cblas_lib = static_library( - cblas_sym_name, - sources: conf['fname'], - include_directories: _inc, - c_args: compiler_args + [ - '-DCBLAS', - f'-DASMNAME=@cblas_sym_name@', - f'-DASMFNAME=@cblas_sym_underscored@', - f'-DNAME=@cblas_sym_underscored@', - f'-DCNAME=@cblas_sym_name@', - f'-DCHAR_NAME="@cblas_sym_underscored@"', - f'-DCHAR_CNAME="@cblas_sym_name@"' - ] - ) - _interface_libs += cblas_lib - endif + _interface_libs += cblas_lib + endif endforeach endforeach diff --git a/meson.build b/meson.build index b4937460b..1cb68f068 100644 --- a/meson.build +++ b/meson.build @@ -268,7 +268,9 @@ ext_mappings = { # '_V': {'def': ['XCONJ'], 'undef': ['CONJ']}, '_D': {'def': ['CONJ', 'XCONJ']}, '_L': {'def': ['LOWER']}, - '_LN': {'def': ['LEFT'], 'undef': ['TRANSA'], 'except': ['?syrk', '?syrk_thread', '?syr2k']}, + '_LN': {'def': ['LEFT'], 'undef': ['TRANSA'], + 'except': ['?syrk', '?syrk_thread', + '?syr2k', '?herk', '?herk_kernel']}, # Handle HEMV and HEMVREV better '_V': {'def': ['HEMV', 'HEMVREV', 'XCONJ'], 'undef': ['LOWER', 'CONJ']}, '_M': {'def': ['HEMV', 'HEMVREV', 'LOWER']}, @@ -299,10 +301,13 @@ ext_mappings = { # Level 3 symbols '_LU': {'def': ['NN'], 'undef': ['LOWER', 'RSIDE']}, '_LL': {'def': ['LOWER', 'NN'], 'undef': ['RSIDE']}, - '_RU': {'def': ['RSIDE', 'NN'], 'undef': ['LOWER']}, - '_RL': {'def': ['RSIDE', 'NN', 'LOWER']}, - '_UN': {'undef': ['TRANS', 'LOWER'], 'except': ['?syrk']}, + '_RU': {'def': ['RSIDE', 'NN'], 'undef': ['LOWER'], 'except': ['?hemm', '?hemm_thread']}, + '_RL': {'def': ['RSIDE', 'NN', 'LOWER'], 'except': ['?hemm', '?hemm_thread']}, + # TODO(rg): is CONJ OK for interface symbols? + '_UN': {'undef': ['TRANS', 'LOWER', 'CONJ'], 'except': ['?syrk']}, '_UT': {'def': ['TRANS'], 'undef': ['LOWER'], 'except': ['?syrk']}, + '_UC': {'def': ['TRANS', 'CONJ'], 'undef': ['LOWER']}, + '_LC': {'def': ['LOWER', 'TRANS', 'CONJ']}, } ext_mappings_l2 = [ @@ -388,8 +393,10 @@ ext_mappings_l3 = [ # syrk {'ext': '_UN', 'def': [], 'undef': ['LOWER', 'TRANS'], 'for': ['s', 'd', 'c', 'z']}, {'ext': '_UT', 'def': ['TRANS'], 'undef': ['LOWER'], 'for': ['s', 'd', 'c', 'z']}, - {'ext': '_LN', 'def': ['LOWER'], 'undef': ['TRANS'], 'for': ['s', 'd', 'c', 'z']}, + {'ext': '_LN', 'def': ['LOWER'], 'undef': ['TRANS', 'CONJ'], 'for': ['s', 'd', 'c', 'z']}, {'ext': '_LT', 'def': ['TRANS', 'LOWER'], 'for': ['s', 'd', 'c', 'z']}, + {'ext': '_RU', 'def': ['RSIDE', 'NC'], 'undef': ['LOWER'], 'for': ['c', 'z']}, + {'ext': '_RL', 'def': ['RSIDE', 'NC', 'LOWER'], 'for': ['c', 'z']}, ] # cc -c -O2 -DSMALL_MATRIX_OPT -DMAX_STACK_ALLOC=2048 -Wall -m64 -DF_INTERFACE_GFORT -fPIC -DSMP_SERVER -DNO_WARMUP -DMAX_CPU_NUMBER=12 -DMAX_PARALLEL_NUMBER=1 -DBUILD_SINGLE=1 -DBUILD_DOUBLE=1 -DBUILD_COMPLEX=1 -DBUILD_COMPLEX16=1 -DVERSION=\"0.3.26.dev\" -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mavx2 -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME -DASMNAME=strmm_RTUU -DASMFNAME=strmm_RTUU_ -DNAME=strmm_RTUU_ -DCNAME=strmm_RTUU -DCHAR_NAME=\"strmm_RTUU_\" -DCHAR_CNAME=\"strmm_RTUU\" -DNO_AFFINITY -I../.. -UDOUBLE -UCOMPLEX -UCOMPLEX -UDOUBLE -DTRANSA -DUPPER -DUNIT trmm_R.c -o strmm_RTUU.o @@ -409,7 +416,6 @@ symb_defs = { '?geru': {'undef': ['CONJ']}, '?gerc': {'def': ['CONJ']}, '?hemm': {'def': ['HEMM']}, - '?herk': {'def': ['HEMM']}, '?her2k': {'def': ['HEMM']}, '?gemm3m': {'def': ['GEMM3M']}, '?symm3m': {'def': ['GEMM3M']}, @@ -428,6 +434,7 @@ _inc = [include_directories('.')] subdir('interface') subdir('driver/level2') subdir('driver/level3') +# subdir('driver/others') subdir('kernel') _openblas = static_library('openblas',