BLD: Add L3 driver symbols

Bringing us up to 2247 symbols..
This commit is contained in:
Rohit Goswami 2024-05-20 01:45:21 +00:00 committed by Mateusz Sokół
parent e5564ec450
commit 854aecce82
3 changed files with 270 additions and 91 deletions

View File

@ -158,6 +158,173 @@ driver_kops = [
'_LN', '_LT']}, '_LN', '_LT']},
} }
}, },
{ 'base': '?hemm',
'sources': {
'zhemm_k.c': {'mode': ['c', 'z'], # 'x'],
# TODO(rg): Do we need ../../param.h ?
# See Makefile:1612
'exts': ['_LU', '_LL',
'_RU', '_RL']},
}
},
{ 'base': '?hemm_thread',
'sources': {
'zhemm_k.c': {'mode': ['c', 'z'], # 'x'],
'addl': ['-DTHREADED_LEVEL3'],
'exts': ['_LU', '_LL',
'_RU', '_RL']},
}
},
{ 'base': '?herk',
'sources': {
'zherk_k.c': {'mode': ['c', 'z'], # 'x'],
'addl': ['-DHERK'],
# TODO(rg): Do we need ../../common.h ?
# See Makefile:1684
'exts': ['_UN', '_UC',
'_LN', '_LC']},
}
},
{ 'base': '?herk_kernel',
'sources': {
'zherk_kernel.c': {'mode': ['c', 'z'], # 'x'],
'addl': ['-DHERK'],
'exts': ['_UN', '_UC',
'_LN', '_LC']},
}
},
{ 'base': '?herk_thread',
'sources': {
'zherk_k.c': {'mode': ['c', 'z'], # 'x'],
'addl': ['-DHERK', '-DTHREADED_LEVEL3'],
'exts': ['_UN', '_UC',
'_LN', '_LC']},
}
},
{ 'base': '?her2k',
'sources': {
'zher2k_k.c': {'mode': ['c', 'z'], # 'x'],
'addl': ['-DHER2K'],
# TODO(rg): Do we need ../../common.h ?
# See Makefile:1793
'exts': ['_UN', '_UC',
'_LN', '_LC']},
}
},
{ 'base': '?her2k_kernel',
'sources': {
'zher2k_kernel.c': {'mode': ['c', 'z'], # 'x'],
# TODO(rg): Do we need ../../common.h ?
# See Makefile:1793
'exts': ['_UN', '_UC',
'_LN', '_LC']},
}
},
{ 'base': '?gemm3m',
'sources': {
'gemm3m.c': {'mode': ['c', 'z',],# 'x'],
'srcs': ['level3.c'],
'exts': ['_nn', '_nt',
'_nr', '_nc',
'_tn', '_tt',
'_tr', '_tc',
'_rn', '_rt',
'_rr', '_rc',
'_cn', '_ct',
'_cr', '_cc']},
}
},
# { 'base': '?gemmf',
# 'sources': {
# # TODO(rg): This in the makefile:4401 but the file isn't there..
# 'zgemmf.c': {'mode': ['c', 'z',],# 'x'],
# 'srcs': ['level3.c'],
# 'exts': ['']},
# }
# },
{ 'base': '?gemm3m_thread',
'sources': {
'gemm3m.c': {'mode': ['c', 'z',],# 'x'],
'addl': ['-DTHREADED_LEVEL3'],
'srcs': ['level3.c'],
'exts': ['_nn', '_nt',
'_nr', '_nc',
'_tn', '_tt',
'_tr', '_tc',
'_rn', '_rt',
'_rr', '_rc',
'_cn', '_ct',
'_cr', '_cc']},
}
},
{ 'base': '?symm3m',
'sources': {
'symm3m_k.c': {'mode': ['c', 'z'], # 'x'],
'exts': ['_LU', '_LL',
'_RU', '_RL']},
}
},
{ 'base': '?symm3m_thread',
'sources': {
'symm3m_k.c': {'mode': ['c', 'z'], # 'x'],
'addl': ['-DTHREADED_LEVEL3'],
'exts': ['_LU', '_LL',
'_RU', '_RL']},
}
},
{ 'base': '?hemm3m',
'sources': {
'hemm3m_k.c': {'mode': ['c', 'z'], # 'x'],
'exts': ['_LU', '_LL',
'_RU', '_RL']},
}
},
{ 'base': '?hemm3m_thread',
'sources': {
'hemm3m_k.c': {'mode': ['c', 'z'], # 'x'],
'addl': ['-DTHREADED_LEVEL3'],
'exts': ['_LU', '_LL',
'_RU', '_RL']},
}
},
{ 'base': '?trsm',
'sources': {
'trsm_L.c': {'mode': ['s', 'd'],# 'q'],
'exts': ['_LNUU', '_LNUN',
'_LNLU', '_LNLN',
'_LTUU', '_LTUN',
'_LTLU', '_LTLN']},
'trsm_R.c': {'mode': ['s', 'd'],# 'q'],
'exts': ['_RNUU', '_RNUN',
'_RNLU', '_RNLN',
'_RTUU', '_RTUN',
'_RTLU', '_RTLN']},
}
},
{ 'base': '?trsm',
'sources': {
'trsm_L.c': {'mode': ['c', 'z'],# 'x'],
'exts': ['_LNUU', '_LNUN',
'_LNLU', '_LNLN',
'_LTUU', '_LTUN',
'_LTLU', '_LTLN',
'_LRUU', '_LRUN',
'_LRLU', '_LRLN',
'_LCUU', '_LCUN',
'_LCLU', '_LCLN',
]},
'trsm_R.c': {'mode': ['c', 'z'],# 'x'],
'exts': ['_RNUU', '_RNUN',
'_RNLU', '_RNLN',
'_RTUU', '_RTUN',
'_RTLU', '_RTLN',
'_RRUU', '_RRUN',
'_RRLU', '_RRLN',
'_RCUU', '_RCUN',
'_RCLU', '_RCLN',
]},
}
},
] ]
# Initialize kernel configurations list # Initialize kernel configurations list

View File

@ -349,6 +349,7 @@ _blas_roots = [
}, },
{ 'base': '?herk', '_types': ['c', 'z', 'x'], { 'base': '?herk', '_types': ['c', 'z', 'x'],
'fname': 'syrk.c', 'fname': 'syrk.c',
'addl': ['-DHEMM'],
'cblas': true, 'cblas': true,
}, },
{ 'base': '?her2k', '_types': ['c', 'z', 'x'], { 'base': '?her2k', '_types': ['c', 'z', 'x'],
@ -556,102 +557,106 @@ _blas_roots = [
_interface_libs = [] _interface_libs = []
foreach conf : _blas_roots foreach conf : _blas_roots
foreach type : conf['_types'] foreach type : conf['_types']
if 'q' in type or 'x' in type if 'q' in type or 'x' in type
# TODO: Figure out when to build these # TODO: Figure out when to build these
# These are the XDOUBLE symbols # These are the XDOUBLE symbols
continue continue
endif endif
# Seed with common args # Seed with common args
compiler_args = _cargs + interface_args compiler_args = _cargs + interface_args
# Generate the symbol flags # Generate the symbol flags
base = conf['base'] base = conf['base']
if symb_defs.has_key(base) if symb_defs.has_key(base)
symb_base = symb_defs[base] symb_base = symb_defs[base]
if symb_base.has_key('def') if symb_base.has_key('def')
foreach _d : symb_base['def'] foreach _d : symb_base['def']
compiler_args += ('-D' + _d) compiler_args += ('-D' + _d)
endforeach
endif
if symb_base.has_key('undef')
foreach _u : symb_base['undef']
compiler_args += ('-U' + _u)
endforeach
endif
endif
# Set the type arguments
if precision_mappings.get(type).has_key('def')
foreach d : precision_mappings[type]['def']
compiler_args += ['-D' + d]
endforeach endforeach
endif endif
if precision_mappings.get(type).has_key('undef') if symb_base.has_key('undef')
foreach u : precision_mappings[type]['undef'] foreach _u : symb_base['undef']
compiler_args += ['-U' + u] compiler_args += ('-U' + _u)
endforeach endforeach
endif endif
endif
# Set the type arguments
if precision_mappings.get(type).has_key('def')
foreach d : precision_mappings[type]['def']
compiler_args += ['-D' + d]
endforeach
endif
if precision_mappings.get(type).has_key('undef')
foreach u : precision_mappings[type]['undef']
compiler_args += ['-U' + u]
endforeach
endif
# Construct the actual symbol names, and mangled symbols if conf.has_key('addl')
# TODO: This might be conditional on other options compiler_args += conf['addl']
sym_name = conf['base'].replace('?', type) endif
sym_underscored = f'@sym_name@_'
if conf.get('cblas', false) # Construct the actual symbol names, and mangled symbols
cblas_sym_name = 'cblas_' + sym_name # TODO: This might be conditional on other options
cblas_sym_underscored = f'@cblas_sym_name@_' sym_name = conf['base'].replace('?', type)
sym_underscored = f'@sym_name@_'
if conf.get('cblas', false)
cblas_sym_name = 'cblas_' + sym_name
cblas_sym_underscored = f'@cblas_sym_name@_'
endif
# Construct conditionals
if conf.has_key('def')
foreach d : conf['def']
compiler_args += ['-D' + d]
endforeach
foreach u : conf['undef']
compiler_args += ['-U' + u]
endforeach
endif
# Make mangled symbols
# TODO: This might be conditional on other options
# Create the static library for each symbol
lib = static_library(
sym_name,
sources: conf['fname'],
include_directories: _inc,
c_args: compiler_args + [
f'-DASMNAME=@sym_name@',
f'-DASMFNAME=@sym_underscored@',
f'-DNAME=@sym_underscored@',
f'-DCNAME=@sym_name@',
f'-DCHAR_NAME="@sym_underscored@"',
f'-DCHAR_CNAME="@sym_name@"'
]
)
_interface_libs += lib
# If it's a CBLAS symbol, also create that
if conf.get('cblas', false)
if 'q' in type or 'x' in type
# There are no cblas_q symbols
# TODO: Handle edge cases around dz zd sc
continue
endif endif
cblas_lib = static_library(
# Construct conditionals cblas_sym_name,
if conf.has_key('def')
foreach d : conf['def']
compiler_args += ['-D' + d]
endforeach
foreach u : conf['undef']
compiler_args += ['-U' + u]
endforeach
endif
# Make mangled symbols
# TODO: This might be conditional on other options
# Create the static library for each symbol
lib = static_library(
sym_name,
sources: conf['fname'], sources: conf['fname'],
include_directories: _inc, include_directories: _inc,
c_args: compiler_args + [ c_args: compiler_args + [
f'-DASMNAME=@sym_name@', '-DCBLAS',
f'-DASMFNAME=@sym_underscored@', f'-DASMNAME=@cblas_sym_name@',
f'-DNAME=@sym_underscored@', f'-DASMFNAME=@cblas_sym_underscored@',
f'-DCNAME=@sym_name@', f'-DNAME=@cblas_sym_underscored@',
f'-DCHAR_NAME="@sym_underscored@"', f'-DCNAME=@cblas_sym_name@',
f'-DCHAR_CNAME="@sym_name@"' f'-DCHAR_NAME="@cblas_sym_underscored@"',
f'-DCHAR_CNAME="@cblas_sym_name@"'
] ]
) )
_interface_libs += lib _interface_libs += cblas_lib
endif
# If it's a CBLAS symbol, also create that
if conf.get('cblas', false)
if 'q' in type or 'x' in type
# There are no cblas_q symbols
# TODO: Handle edge cases around dz zd sc
continue
endif
cblas_lib = static_library(
cblas_sym_name,
sources: conf['fname'],
include_directories: _inc,
c_args: compiler_args + [
'-DCBLAS',
f'-DASMNAME=@cblas_sym_name@',
f'-DASMFNAME=@cblas_sym_underscored@',
f'-DNAME=@cblas_sym_underscored@',
f'-DCNAME=@cblas_sym_name@',
f'-DCHAR_NAME="@cblas_sym_underscored@"',
f'-DCHAR_CNAME="@cblas_sym_name@"'
]
)
_interface_libs += cblas_lib
endif
endforeach endforeach
endforeach endforeach

View File

@ -268,7 +268,9 @@ ext_mappings = {
# '_V': {'def': ['XCONJ'], 'undef': ['CONJ']}, # '_V': {'def': ['XCONJ'], 'undef': ['CONJ']},
'_D': {'def': ['CONJ', 'XCONJ']}, '_D': {'def': ['CONJ', 'XCONJ']},
'_L': {'def': ['LOWER']}, '_L': {'def': ['LOWER']},
'_LN': {'def': ['LEFT'], 'undef': ['TRANSA'], 'except': ['?syrk', '?syrk_thread', '?syr2k']}, '_LN': {'def': ['LEFT'], 'undef': ['TRANSA'],
'except': ['?syrk', '?syrk_thread',
'?syr2k', '?herk', '?herk_kernel']},
# Handle HEMV and HEMVREV better # Handle HEMV and HEMVREV better
'_V': {'def': ['HEMV', 'HEMVREV', 'XCONJ'], 'undef': ['LOWER', 'CONJ']}, '_V': {'def': ['HEMV', 'HEMVREV', 'XCONJ'], 'undef': ['LOWER', 'CONJ']},
'_M': {'def': ['HEMV', 'HEMVREV', 'LOWER']}, '_M': {'def': ['HEMV', 'HEMVREV', 'LOWER']},
@ -299,10 +301,13 @@ ext_mappings = {
# Level 3 symbols # Level 3 symbols
'_LU': {'def': ['NN'], 'undef': ['LOWER', 'RSIDE']}, '_LU': {'def': ['NN'], 'undef': ['LOWER', 'RSIDE']},
'_LL': {'def': ['LOWER', 'NN'], 'undef': ['RSIDE']}, '_LL': {'def': ['LOWER', 'NN'], 'undef': ['RSIDE']},
'_RU': {'def': ['RSIDE', 'NN'], 'undef': ['LOWER']}, '_RU': {'def': ['RSIDE', 'NN'], 'undef': ['LOWER'], 'except': ['?hemm', '?hemm_thread']},
'_RL': {'def': ['RSIDE', 'NN', 'LOWER']}, '_RL': {'def': ['RSIDE', 'NN', 'LOWER'], 'except': ['?hemm', '?hemm_thread']},
'_UN': {'undef': ['TRANS', 'LOWER'], 'except': ['?syrk']}, # TODO(rg): is CONJ OK for interface symbols?
'_UN': {'undef': ['TRANS', 'LOWER', 'CONJ'], 'except': ['?syrk']},
'_UT': {'def': ['TRANS'], 'undef': ['LOWER'], 'except': ['?syrk']}, '_UT': {'def': ['TRANS'], 'undef': ['LOWER'], 'except': ['?syrk']},
'_UC': {'def': ['TRANS', 'CONJ'], 'undef': ['LOWER']},
'_LC': {'def': ['LOWER', 'TRANS', 'CONJ']},
} }
ext_mappings_l2 = [ ext_mappings_l2 = [
@ -388,8 +393,10 @@ ext_mappings_l3 = [
# syrk # syrk
{'ext': '_UN', 'def': [], 'undef': ['LOWER', 'TRANS'], 'for': ['s', 'd', 'c', 'z']}, {'ext': '_UN', 'def': [], 'undef': ['LOWER', 'TRANS'], 'for': ['s', 'd', 'c', 'z']},
{'ext': '_UT', 'def': ['TRANS'], 'undef': ['LOWER'], 'for': ['s', 'd', 'c', 'z']}, {'ext': '_UT', 'def': ['TRANS'], 'undef': ['LOWER'], 'for': ['s', 'd', 'c', 'z']},
{'ext': '_LN', 'def': ['LOWER'], 'undef': ['TRANS'], 'for': ['s', 'd', 'c', 'z']}, {'ext': '_LN', 'def': ['LOWER'], 'undef': ['TRANS', 'CONJ'], 'for': ['s', 'd', 'c', 'z']},
{'ext': '_LT', 'def': ['TRANS', 'LOWER'], 'for': ['s', 'd', 'c', 'z']}, {'ext': '_LT', 'def': ['TRANS', 'LOWER'], 'for': ['s', 'd', 'c', 'z']},
{'ext': '_RU', 'def': ['RSIDE', 'NC'], 'undef': ['LOWER'], 'for': ['c', 'z']},
{'ext': '_RL', 'def': ['RSIDE', 'NC', 'LOWER'], 'for': ['c', 'z']},
] ]
# cc -c -O2 -DSMALL_MATRIX_OPT -DMAX_STACK_ALLOC=2048 -Wall -m64 -DF_INTERFACE_GFORT -fPIC -DSMP_SERVER -DNO_WARMUP -DMAX_CPU_NUMBER=12 -DMAX_PARALLEL_NUMBER=1 -DBUILD_SINGLE=1 -DBUILD_DOUBLE=1 -DBUILD_COMPLEX=1 -DBUILD_COMPLEX16=1 -DVERSION=\"0.3.26.dev\" -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mavx2 -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME -DASMNAME=strmm_RTUU -DASMFNAME=strmm_RTUU_ -DNAME=strmm_RTUU_ -DCNAME=strmm_RTUU -DCHAR_NAME=\"strmm_RTUU_\" -DCHAR_CNAME=\"strmm_RTUU\" -DNO_AFFINITY -I../.. -UDOUBLE -UCOMPLEX -UCOMPLEX -UDOUBLE -DTRANSA -DUPPER -DUNIT trmm_R.c -o strmm_RTUU.o # cc -c -O2 -DSMALL_MATRIX_OPT -DMAX_STACK_ALLOC=2048 -Wall -m64 -DF_INTERFACE_GFORT -fPIC -DSMP_SERVER -DNO_WARMUP -DMAX_CPU_NUMBER=12 -DMAX_PARALLEL_NUMBER=1 -DBUILD_SINGLE=1 -DBUILD_DOUBLE=1 -DBUILD_COMPLEX=1 -DBUILD_COMPLEX16=1 -DVERSION=\"0.3.26.dev\" -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mavx2 -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME -DASMNAME=strmm_RTUU -DASMFNAME=strmm_RTUU_ -DNAME=strmm_RTUU_ -DCNAME=strmm_RTUU -DCHAR_NAME=\"strmm_RTUU_\" -DCHAR_CNAME=\"strmm_RTUU\" -DNO_AFFINITY -I../.. -UDOUBLE -UCOMPLEX -UCOMPLEX -UDOUBLE -DTRANSA -DUPPER -DUNIT trmm_R.c -o strmm_RTUU.o
@ -409,7 +416,6 @@ symb_defs = {
'?geru': {'undef': ['CONJ']}, '?geru': {'undef': ['CONJ']},
'?gerc': {'def': ['CONJ']}, '?gerc': {'def': ['CONJ']},
'?hemm': {'def': ['HEMM']}, '?hemm': {'def': ['HEMM']},
'?herk': {'def': ['HEMM']},
'?her2k': {'def': ['HEMM']}, '?her2k': {'def': ['HEMM']},
'?gemm3m': {'def': ['GEMM3M']}, '?gemm3m': {'def': ['GEMM3M']},
'?symm3m': {'def': ['GEMM3M']}, '?symm3m': {'def': ['GEMM3M']},
@ -428,6 +434,7 @@ _inc = [include_directories('.')]
subdir('interface') subdir('interface')
subdir('driver/level2') subdir('driver/level2')
subdir('driver/level3') subdir('driver/level3')
# subdir('driver/others')
subdir('kernel') subdir('kernel')
_openblas = static_library('openblas', _openblas = static_library('openblas',