ENH: Add in the rest of the level2 symbols

This commit is contained in:
Rohit Goswami 2024-05-12 13:35:51 +00:00 committed by Mateusz Sokół
parent 2fe1f31161
commit 86d32c7a14
2 changed files with 573 additions and 36 deletions

View File

@ -12,40 +12,540 @@ driver_kops = [
},
{ 'base': '?gbmv_thread',
'sources': {
'gbmv_thread.c': {'mode': ['s', 'd', 'q',
'c', 'z', 'x'],
'exts': ['_n', '_t', '_r', '_c',
'_o', '_u', '_s', '_d']},
'gbmv_thread.c': {'mode': ['s', 'd'],#, 'q',
# 'c', 'z', 'x'],
# only _n and _t normally
'exts': ['_n', '_t',]}, # '_r', '_c',
# '_o', '_u', '_s', '_d']},
}
},
{ 'base': '?gemv_thread',
'sources': {
'gemv_thread.c': {'mode': ['s', 'd', 'q',
'c', 'z', 'x'],
'exts': ['_n', '_t', '_r', '_c',
'_o', '_u', '_s', '_d']},
}
},
{ 'base': '?ger_thread',
'sources': {
'ger_thread.c': {'mode': ['s', 'd', 'q'], 'exts': ['']},
'ger_thread.c': {'mode': ['c', 'z', 'x'],
'exts': ['_U', '_C',
'_V', '_D']},
}
},
{ 'base': '?symv_thread',
'sources': {
'symv_thread.c': {'mode': ['s', 'd', 'q',
'c', 'z', 'x'],
'exts': ['_U', '_L']},
}
},
{ 'base': '?hemv_thread',
'sources': {
'symv_thread.c': {'mode': ['c', 'z'],
'exts': ['_U', '_L',
'_V', '_M']},
},
},
# { 'base': '?gemv_thread',
# 'sources': {
# 'gemv_thread.c': {'mode': ['s', 'd', 'q',
# 'c', 'z', 'x'],
# 'exts': ['_n', '_t', '_r', '_c',
# '_o', '_u', '_s', '_d']},
# }
# },
# { 'base': '?ger_thread',
# 'sources': {
# 'ger_thread.c': {'mode': ['s', 'd', 'q'], 'exts': ['']},
# }
# },
# { 'base': '?ger_thread',
# 'sources': {
# 'ger_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_C',
# '_V', '_D']},
# }
# },
# { 'base': '?symv_thread',
# 'sources': {
# 'symv_thread.c': {'mode': ['s', 'd', 'q',
# 'c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?hemv_thread',
# 'sources': {
# 'symv_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?syr_thread',
# 'sources': {
# 'syr_thread.c': {'mode': ['s', 'd', 'q',
# 'c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# },
# },
# { 'base': '?her_thread',
# 'sources': {
# 'syr_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?syr2_thread',
# 'sources': {
# 'syr2_thread.c': {'mode': ['s', 'd', 'q',
# 'c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# },
# },
# { 'base': '?her2_thread',
# 'sources': {
# 'syr2_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?hbmv',
# 'sources': {
# 'zhbmv_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?hbmv_thread',
# 'sources': {
# 'sbmv_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?her',
# 'sources': {
# 'zher_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?her2',
# 'sources': {
# 'zher2_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?hpmv',
# 'sources': {
# 'zhpmv_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?hpmv_thread',
# 'sources': {
# 'spmv_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?hpr',
# 'sources': {
# 'zhpr_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?hpr_thread',
# 'sources': {
# 'spr_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?hpr2',
# 'sources': {
# 'zhpr2_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?hpr2_thread',
# 'sources': {
# 'spr2_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L',
# '_V', '_M']},
# },
# },
# { 'base': '?sbmv',
# 'sources': {
# 'sbmv_k.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_U', '_L']},
# 'zsbmv_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?sbmv_thread',
# 'sources': {
# 'sbmv_thread.c': {'mode': ['s', 'd', 'q',
# 'c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?spmv',
# 'sources': {
# 'spmv_k.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_U', '_L']},
# 'zspmv_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?spmv_thread',
# 'sources': {
# 'spmv_thread.c': {'mode': ['s', 'd', 'q',
# 'c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?spr',
# 'sources': {
# 'spr_k.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_U', '_L']},
# 'zspr_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?spr_thread',
# 'sources': {
# 'spr_thread.c': {'mode': ['s', 'd', 'q',
# 'c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?spr2',
# 'sources': {
# 'spr2_k.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_U', '_L']},
# 'zspr2_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?spr2_thread',
# 'sources': {
# 'spr2_thread.c': {'mode': ['s', 'd', 'q',
# 'c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?syr',
# 'sources': {
# 'syr_k.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_U', '_L']},
# 'zsyr_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?syr2',
# 'sources': {
# 'syr2_k.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_U', '_L']},
# 'zsyr2_k.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_U', '_L']},
# }
# },
# { 'base': '?tbmv',
# 'sources': {
# 'tbmv_U.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN']},
# 'tbmv_L.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NLU', '_NLN',
# '_TUU', '_TUN']},
# 'ztbmv_U.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_CLU', '_CLN',
# '_RUU', '_RUN']},
# 'ztbmv_L.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_RLU', '_RLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN',
# '_CUU', '_CUN']},
# }
# },
# { 'base': '?tbmv_thread',
# 'sources': {
# 'tbmv_thread.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN']},
# }
# },
# { 'base': '?tbmv_thread',
# 'sources': {
# 'tbmv_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_RLU', '_RLN',
# '_CLU', '_CLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN',
# '_RUU', '_RUN',
# '_CUU', '_CUN']},
# }
# },
# { 'base': '?tbsv',
# 'sources': {
# 'tbsv_U.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN']},
# 'tbsv_L.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NLU', '_NLN',
# '_TUU', '_TUN']},
# 'ztbsv_U.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_CLU', '_CLN',
# '_RUU', '_RUN']},
# 'ztbsv_L.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_RLU', '_RLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN',
# '_CUU', '_CUN']},
# }
# },
# { 'base': '?tpmv',
# 'sources': {
# 'tpmv_U.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN']},
# 'tpmv_L.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NLU', '_NLN',
# '_TUU', '_TUN']},
# 'ztpmv_U.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_CLU', '_CLN',
# '_RUU', '_RUN']},
# 'ztpmv_L.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_RLU', '_RLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN',
# '_CUU', '_CUN']},
# }
# },
# { 'base': '?tpmv_thread',
# 'sources': {
# 'tpmv_thread.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN']},
# }
# },
# { 'base': '?tpmv_thread',
# 'sources': {
# 'tpmv_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_RLU', '_RLN',
# '_CLU', '_CLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN',
# '_RUU', '_RUN',
# '_CUU', '_CUN']},
# }
# },
# { 'base': '?tpsv',
# 'sources': {
# 'tpsv_U.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN']},
# 'tpsv_L.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NLU', '_NLN',
# '_TUU', '_TUN']},
# 'ztpsv_U.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_CLU', '_CLN',
# '_RUU', '_RUN']},
# 'ztpsv_L.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_RLU', '_RLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN',
# '_CUU', '_CUN']},
# }
# },
# { 'base': '?trmv',
# 'sources': {
# 'trmv_U.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN']},
# 'trmv_L.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NLU', '_NLN',
# '_TUU', '_TUN']},
# 'ztrmv_U.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_CLU', '_CLN',
# '_RUU', '_RUN']},
# 'ztrmv_L.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_RLU', '_RLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN',
# '_CUU', '_CUN']},
# }
# },
# { 'base': '?trmv_thread',
# 'sources': {
# 'trmv_thread.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN']},
# }
# },
# { 'base': '?trmv_thread',
# 'sources': {
# 'trmv_thread.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_RLU', '_RLN',
# '_CLU', '_CLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN',
# '_RUU', '_RUN',
# '_CUU', '_CUN']},
# }
# },
# { 'base': '?trsv',
# 'sources': {
# 'trsv_U.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN']},
# 'trsv_L.c': {'mode': ['s', 'd', 'q'],
# 'exts': ['_NLU', '_NLN',
# '_TUU', '_TUN']},
# 'ztrsv_U.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_NUU', '_NUN',
# '_TLU', '_TLN',
# '_CLU', '_CLN',
# '_RUU', '_RUN']},
# 'ztrsv_L.c': {'mode': ['c', 'z', 'x'],
# 'exts': ['_RLU', '_RLN',
# '_NLU', '_NLN',
# '_TUU', '_TUN',
# '_CUU', '_CUN']},
# }
# },
# # TODO(rg): Add the bfloat conditionals from Makefile:3709
]
# Initialize kernel configurations list
kernel_confs = []
# Iterate through each kernel operation
foreach _kop : driver_kops
base = _kop['base']
sources = _kop['sources']
# Generate the symbol flags
_ckop_args = []
if symb_defs.has_key(base)
symb_base = symb_defs[base]
if symb_base.has_key('def')
foreach _d : symb_base['def']
_ckop_args += ['-D' + _d]
endforeach
endif
if symb_base.has_key('undef')
foreach _u : symb_base['undef']
_ckop_args += ['-U' + _u]
endforeach
endif
endif
# Iterate through each source file and its details
foreach fname, details : sources
modes = details['mode']
exts = details['exts']
# Iterate through each mode
foreach mode : modes
# Generate the mapping for the type
__cargs = _cargs + _ckop_args
prec_mode = precision_mappings[mode]
# Add precision-specific definitions
if prec_mode.has_key('def')
foreach _d : prec_mode['def']
__cargs += ['-D' + _d]
endforeach
endif
if prec_mode.has_key('undef')
foreach _u : prec_mode['undef']
__cargs += ['-U' + _u]
endforeach
endif
# Iterate through each extension
foreach ext : exts
_ext_cargs = []
# Check ext_mappings first
if ext_mappings.has_key(ext)
extmap = ext_mappings[ext]
if extmap.has_key('def')
foreach _d : extmap['def']
_ext_cargs += ['-D' + _d]
endforeach
endif
if extmap.has_key('undef')
foreach _u : extmap['undef']
_ext_cargs += ['-U' + _u]
endforeach
endif
else
# Fallback to ext_mappings_l2
foreach ext_map : ext_mappings_l2
if ext_map['ext'] == ext and mode in ext_map['for']
if ext_map.has_key('def')
foreach _d : ext_map['def']
_ext_cargs += ['-D' + _d]
endforeach
endif
if ext_map.has_key('undef')
foreach _u : ext_map['undef']
_ext_cargs += ['-U' + _u]
endforeach
endif
break
endif
endforeach
endif
# Construct the final paths
src = fname
# Add additional flags if present
if sources[fname].has_key('addl')
_ext_cargs += sources[fname]['addl']
endif
# Generate the symbol name
sym_name = base.replace('?', mode) + ext
sym_underscored = f'@sym_name@_'
# Add standard flags for naming conventions
_ext_cargs += [
f'-DASMNAME=@sym_name@',
f'-DASMFNAME=@sym_underscored@',
f'-DNAME=@sym_underscored@',
f'-DCNAME=@sym_name@',
f'-DCHAR_NAME="@sym_underscored@"',
f'-DCHAR_CNAME="@sym_name@"',
]
# Append the current configuration
current_def = {
'c_args': __cargs + _ext_cargs,
'name': sym_name,
'src': src,
}
kernel_confs += [current_def]
endforeach
endforeach
endforeach
endforeach
# Create the static libraries from the configurations
_kern_libs = []
foreach conf : kernel_confs
message(conf['name'])
message(conf)
_kern_libs += [static_library(
conf['name'],
conf['src'],
include_directories: _inc,
c_args: conf['c_args'],
)]
endforeach
# Create the final kernel library
_kern = static_library('_kern',
link_whole: _kern_libs)

View File

@ -184,6 +184,15 @@ add_project_arguments(simd_cargs, language: 'c')
# Common symbol related options
symnames = ['ASMNAME', 'ASMFNAME', 'NAME', 'CNAME', 'CHAR_NAME', 'CHAR_CNAME']
# TODO(rg): Maybe make these conditional..
_cargs += [
'-DSMP_SERVER', # This is evidently necessary for the driver/level2
'-DBUILD_SINGLE=1',
'-DBUILD_DOUBLE=1',
'-DBUILD_COMPLEX=1',
'-DBUILD_COMPLEX16=1',
]
# Other common options, move later
# Undefine to help prevent clashes
foreach symb : symnames
@ -260,12 +269,12 @@ ext_mappings = {
'_k': {},
'_U': {'undef': ['LOWER', 'CONJ', 'XCONJ']},
'_C': {'def': ['CONJ'], 'undef': ['XCONJ']},
'_V': {'def': ['XCONJ'], 'undef': ['CONJ']},
# '_V': {'def': ['XCONJ'], 'undef': ['CONJ']},
'_D': {'def': ['CONJ', 'XCONJ']},
'_L': {'def': ['LOWER']},
'_LN': {'def': ['LEFT'], 'undef': ['TRANSA']},
# Handle HEMV and HEMVREV better
'_V': {'def': ['HEMV', 'HEMVREV'], 'undef': ['LOWER']},
'_V': {'def': ['HEMV', 'HEMVREV', 'XCONJ'], 'undef': ['LOWER', 'CONJ']},
'_M': {'def': ['HEMV', 'HEMVREV', 'LOWER']},
'_n': {'undef': ['TRANS', 'TRANSA', 'CONJ', 'XCONJ']},
'_t': {'def': ['TRANS', 'TRANSA'], 'undef': ['CONJ', 'XCONJ']},
@ -277,6 +286,34 @@ ext_mappings = {
'_d': {'def': ['TRANS', 'TRANSA', 'CONJ', 'XCONJ']},
}
ext_mappings_l2 = [
{'ext': '_NUU', 'def': ['UNIT'], 'undef': ['TRANSA'], 'for': ['s', 'd']},
{'ext': '_NUN', 'undef': ['TRANSA', 'UNIT'], 'for': ['s', 'd']},
{'ext': '_TLU', 'def': ['UNIT', 'TRANSA'], 'for': ['s', 'd']},
{'ext': '_TLN', 'def': ['TRANSA'], 'undef': ['UNIT'], 'for': ['s', 'd']},
{'ext': '_NLU', 'def': ['UNIT'], 'undef': ['TRANSA'], 'for': ['s', 'd']},
{'ext': '_NLN', 'undef': ['TRANSA', 'UNIT'], 'for': ['s', 'd']},
{'ext': '_TUU', 'def': ['UNIT', 'TRANSA'], 'for': ['s', 'd']},
{'ext': '_TUN', 'def': ['TRANSA'], 'undef': ['UNIT'], 'for': ['s', 'd']},
{'ext': '_NUU', 'def': ['UNIT', 'TRANSA=1'], 'for': ['c', 'x', 'z']},
{'ext': '_NUN', 'def': ['TRANSA=1'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']},
{'ext': '_TLU', 'def': ['UNIT', 'TRANSA=2'], 'for': ['c', 'x', 'z']},
{'ext': '_TLN', 'def': ['TRANSA=2'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']},
{'ext': '_RLU', 'def': ['UNIT', 'TRANSA=3'], 'for': ['c', 'x', 'z']},
{'ext': '_RLN', 'def': ['TRANSA=3'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']},
{'ext': '_CLU', 'def': ['UNIT', 'TRANSA=4'], 'for': ['c', 'x', 'z']},
{'ext': '_CLN', 'def': ['TRANSA=4'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']},
{'ext': '_NLU', 'def': ['UNIT', 'TRANSA=1'], 'for': ['c', 'x', 'z']},
{'ext': '_NLN', 'def': ['TRANSA=1'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']},
{'ext': '_TUU', 'def': ['UNIT', 'TRANSA=2'], 'for': ['s', 'd']},
{'ext': '_TUN', 'def': ['TRANSA=2'], 'undef': ['UNIT'], 'for': ['s', 'd']},
{'ext': '_RUU', 'def': ['UNIT', 'TRANSA=3'], 'for': ['c', 'x', 'z']},
{'ext': '_RUN', 'def': ['TRANSA=3'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']},
{'ext': '_CUU', 'def': ['UNIT', 'TRANSA=4'], 'for': ['c', 'x', 'z']},
{'ext': '_CUN', 'def': ['TRANSA=4'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']}
]
symb_defs = {
'?amax': {'def': ['USE_ABS'], 'undef': ['USE_MIN']},
'?amin': {'def': ['USE_ABS', 'USE_MIN']},