diff --git a/driver/level2/meson.build b/driver/level2/meson.build index f7bc3508e..0bff6d5c4 100644 --- a/driver/level2/meson.build +++ b/driver/level2/meson.build @@ -12,40 +12,540 @@ driver_kops = [ }, { 'base': '?gbmv_thread', 'sources': { - 'gbmv_thread.c': {'mode': ['s', 'd', 'q', - 'c', 'z', 'x'], - 'exts': ['_n', '_t', '_r', '_c', - '_o', '_u', '_s', '_d']}, + 'gbmv_thread.c': {'mode': ['s', 'd'],#, 'q', + # 'c', 'z', 'x'], + # only _n and _t normally + 'exts': ['_n', '_t',]}, # '_r', '_c', +# '_o', '_u', '_s', '_d']}, } }, - { 'base': '?gemv_thread', - 'sources': { - 'gemv_thread.c': {'mode': ['s', 'd', 'q', - 'c', 'z', 'x'], - 'exts': ['_n', '_t', '_r', '_c', - '_o', '_u', '_s', '_d']}, - } - }, - { 'base': '?ger_thread', - 'sources': { - 'ger_thread.c': {'mode': ['s', 'd', 'q'], 'exts': ['']}, - 'ger_thread.c': {'mode': ['c', 'z', 'x'], - 'exts': ['_U', '_C', - '_V', '_D']}, - } - }, - { 'base': '?symv_thread', - 'sources': { - 'symv_thread.c': {'mode': ['s', 'd', 'q', - 'c', 'z', 'x'], - 'exts': ['_U', '_L']}, - } - }, - { 'base': '?hemv_thread', - 'sources': { - 'symv_thread.c': {'mode': ['c', 'z'], - 'exts': ['_U', '_L', - '_V', '_M']}, - }, - }, + # { 'base': '?gemv_thread', + # 'sources': { + # 'gemv_thread.c': {'mode': ['s', 'd', 'q', + # 'c', 'z', 'x'], + # 'exts': ['_n', '_t', '_r', '_c', + # '_o', '_u', '_s', '_d']}, + # } + # }, + # { 'base': '?ger_thread', + # 'sources': { + # 'ger_thread.c': {'mode': ['s', 'd', 'q'], 'exts': ['']}, + # } + # }, + # { 'base': '?ger_thread', + # 'sources': { + # 'ger_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_C', + # '_V', '_D']}, + # } + # }, + # { 'base': '?symv_thread', + # 'sources': { + # 'symv_thread.c': {'mode': ['s', 'd', 'q', + # 'c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?hemv_thread', + # 'sources': { + # 'symv_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?syr_thread', + # 'sources': { + # 'syr_thread.c': {'mode': ['s', 'd', 'q', + # 'c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # }, + # }, + # { 'base': '?her_thread', + # 'sources': { + # 'syr_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?syr2_thread', + # 'sources': { + # 'syr2_thread.c': {'mode': ['s', 'd', 'q', + # 'c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # }, + # }, + # { 'base': '?her2_thread', + # 'sources': { + # 'syr2_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?hbmv', + # 'sources': { + # 'zhbmv_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?hbmv_thread', + # 'sources': { + # 'sbmv_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?her', + # 'sources': { + # 'zher_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?her2', + # 'sources': { + # 'zher2_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?hpmv', + # 'sources': { + # 'zhpmv_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?hpmv_thread', + # 'sources': { + # 'spmv_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?hpr', + # 'sources': { + # 'zhpr_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?hpr_thread', + # 'sources': { + # 'spr_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?hpr2', + # 'sources': { + # 'zhpr2_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?hpr2_thread', + # 'sources': { + # 'spr2_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L', + # '_V', '_M']}, + # }, + # }, + # { 'base': '?sbmv', + # 'sources': { + # 'sbmv_k.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_U', '_L']}, + # 'zsbmv_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?sbmv_thread', + # 'sources': { + # 'sbmv_thread.c': {'mode': ['s', 'd', 'q', + # 'c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?spmv', + # 'sources': { + # 'spmv_k.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_U', '_L']}, + # 'zspmv_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?spmv_thread', + # 'sources': { + # 'spmv_thread.c': {'mode': ['s', 'd', 'q', + # 'c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?spr', + # 'sources': { + # 'spr_k.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_U', '_L']}, + # 'zspr_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?spr_thread', + # 'sources': { + # 'spr_thread.c': {'mode': ['s', 'd', 'q', + # 'c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?spr2', + # 'sources': { + # 'spr2_k.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_U', '_L']}, + # 'zspr2_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?spr2_thread', + # 'sources': { + # 'spr2_thread.c': {'mode': ['s', 'd', 'q', + # 'c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?syr', + # 'sources': { + # 'syr_k.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_U', '_L']}, + # 'zsyr_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?syr2', + # 'sources': { + # 'syr2_k.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_U', '_L']}, + # 'zsyr2_k.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_U', '_L']}, + # } + # }, + # { 'base': '?tbmv', + # 'sources': { + # 'tbmv_U.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN']}, + # 'tbmv_L.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NLU', '_NLN', + # '_TUU', '_TUN']}, + # 'ztbmv_U.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_CLU', '_CLN', + # '_RUU', '_RUN']}, + # 'ztbmv_L.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_RLU', '_RLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN', + # '_CUU', '_CUN']}, + # } + # }, + # { 'base': '?tbmv_thread', + # 'sources': { + # 'tbmv_thread.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN']}, + # } + # }, + # { 'base': '?tbmv_thread', + # 'sources': { + # 'tbmv_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_RLU', '_RLN', + # '_CLU', '_CLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN', + # '_RUU', '_RUN', + # '_CUU', '_CUN']}, + # } + # }, + # { 'base': '?tbsv', + # 'sources': { + # 'tbsv_U.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN']}, + # 'tbsv_L.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NLU', '_NLN', + # '_TUU', '_TUN']}, + # 'ztbsv_U.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_CLU', '_CLN', + # '_RUU', '_RUN']}, + # 'ztbsv_L.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_RLU', '_RLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN', + # '_CUU', '_CUN']}, + # } + # }, + # { 'base': '?tpmv', + # 'sources': { + # 'tpmv_U.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN']}, + # 'tpmv_L.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NLU', '_NLN', + # '_TUU', '_TUN']}, + # 'ztpmv_U.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_CLU', '_CLN', + # '_RUU', '_RUN']}, + # 'ztpmv_L.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_RLU', '_RLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN', + # '_CUU', '_CUN']}, + # } + # }, + # { 'base': '?tpmv_thread', + # 'sources': { + # 'tpmv_thread.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN']}, + # } + # }, + # { 'base': '?tpmv_thread', + # 'sources': { + # 'tpmv_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_RLU', '_RLN', + # '_CLU', '_CLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN', + # '_RUU', '_RUN', + # '_CUU', '_CUN']}, + # } + # }, + # { 'base': '?tpsv', + # 'sources': { + # 'tpsv_U.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN']}, + # 'tpsv_L.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NLU', '_NLN', + # '_TUU', '_TUN']}, + # 'ztpsv_U.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_CLU', '_CLN', + # '_RUU', '_RUN']}, + # 'ztpsv_L.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_RLU', '_RLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN', + # '_CUU', '_CUN']}, + # } + # }, + # { 'base': '?trmv', + # 'sources': { + # 'trmv_U.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN']}, + # 'trmv_L.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NLU', '_NLN', + # '_TUU', '_TUN']}, + # 'ztrmv_U.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_CLU', '_CLN', + # '_RUU', '_RUN']}, + # 'ztrmv_L.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_RLU', '_RLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN', + # '_CUU', '_CUN']}, + # } + # }, + # { 'base': '?trmv_thread', + # 'sources': { + # 'trmv_thread.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN']}, + # } + # }, + # { 'base': '?trmv_thread', + # 'sources': { + # 'trmv_thread.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_RLU', '_RLN', + # '_CLU', '_CLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN', + # '_RUU', '_RUN', + # '_CUU', '_CUN']}, + # } + # }, + # { 'base': '?trsv', + # 'sources': { + # 'trsv_U.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN']}, + # 'trsv_L.c': {'mode': ['s', 'd', 'q'], + # 'exts': ['_NLU', '_NLN', + # '_TUU', '_TUN']}, + # 'ztrsv_U.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_NUU', '_NUN', + # '_TLU', '_TLN', + # '_CLU', '_CLN', + # '_RUU', '_RUN']}, + # 'ztrsv_L.c': {'mode': ['c', 'z', 'x'], + # 'exts': ['_RLU', '_RLN', + # '_NLU', '_NLN', + # '_TUU', '_TUN', + # '_CUU', '_CUN']}, + # } + # }, + # # TODO(rg): Add the bfloat conditionals from Makefile:3709 ] + +# Initialize kernel configurations list +kernel_confs = [] + +# Iterate through each kernel operation +foreach _kop : driver_kops + base = _kop['base'] + sources = _kop['sources'] + + # Generate the symbol flags + _ckop_args = [] + if symb_defs.has_key(base) + symb_base = symb_defs[base] + if symb_base.has_key('def') + foreach _d : symb_base['def'] + _ckop_args += ['-D' + _d] + endforeach + endif + if symb_base.has_key('undef') + foreach _u : symb_base['undef'] + _ckop_args += ['-U' + _u] + endforeach + endif + endif + + # Iterate through each source file and its details + foreach fname, details : sources + modes = details['mode'] + exts = details['exts'] + + # Iterate through each mode + foreach mode : modes + # Generate the mapping for the type + __cargs = _cargs + _ckop_args + prec_mode = precision_mappings[mode] + + # Add precision-specific definitions + if prec_mode.has_key('def') + foreach _d : prec_mode['def'] + __cargs += ['-D' + _d] + endforeach + endif + if prec_mode.has_key('undef') + foreach _u : prec_mode['undef'] + __cargs += ['-U' + _u] + endforeach + endif + + # Iterate through each extension + foreach ext : exts + _ext_cargs = [] + + # Check ext_mappings first + if ext_mappings.has_key(ext) + extmap = ext_mappings[ext] + if extmap.has_key('def') + foreach _d : extmap['def'] + _ext_cargs += ['-D' + _d] + endforeach + endif + if extmap.has_key('undef') + foreach _u : extmap['undef'] + _ext_cargs += ['-U' + _u] + endforeach + endif + else + # Fallback to ext_mappings_l2 + foreach ext_map : ext_mappings_l2 + if ext_map['ext'] == ext and mode in ext_map['for'] + if ext_map.has_key('def') + foreach _d : ext_map['def'] + _ext_cargs += ['-D' + _d] + endforeach + endif + if ext_map.has_key('undef') + foreach _u : ext_map['undef'] + _ext_cargs += ['-U' + _u] + endforeach + endif + break + endif + endforeach + endif + + # Construct the final paths + src = fname + + # Add additional flags if present + if sources[fname].has_key('addl') + _ext_cargs += sources[fname]['addl'] + endif + + # Generate the symbol name + sym_name = base.replace('?', mode) + ext + sym_underscored = f'@sym_name@_' + + # Add standard flags for naming conventions + _ext_cargs += [ + f'-DASMNAME=@sym_name@', + f'-DASMFNAME=@sym_underscored@', + f'-DNAME=@sym_underscored@', + f'-DCNAME=@sym_name@', + f'-DCHAR_NAME="@sym_underscored@"', + f'-DCHAR_CNAME="@sym_name@"', + ] + + # Append the current configuration + current_def = { + 'c_args': __cargs + _ext_cargs, + 'name': sym_name, + 'src': src, + } + kernel_confs += [current_def] + endforeach + endforeach + endforeach +endforeach + +# Create the static libraries from the configurations +_kern_libs = [] +foreach conf : kernel_confs + message(conf['name']) + message(conf) + _kern_libs += [static_library( + conf['name'], + conf['src'], + include_directories: _inc, + c_args: conf['c_args'], + )] +endforeach + +# Create the final kernel library +_kern = static_library('_kern', + link_whole: _kern_libs) diff --git a/meson.build b/meson.build index 6df57a9ad..d0d48bf04 100644 --- a/meson.build +++ b/meson.build @@ -184,6 +184,15 @@ add_project_arguments(simd_cargs, language: 'c') # Common symbol related options symnames = ['ASMNAME', 'ASMFNAME', 'NAME', 'CNAME', 'CHAR_NAME', 'CHAR_CNAME'] +# TODO(rg): Maybe make these conditional.. +_cargs += [ + '-DSMP_SERVER', # This is evidently necessary for the driver/level2 + '-DBUILD_SINGLE=1', + '-DBUILD_DOUBLE=1', + '-DBUILD_COMPLEX=1', + '-DBUILD_COMPLEX16=1', +] + # Other common options, move later # Undefine to help prevent clashes foreach symb : symnames @@ -260,12 +269,12 @@ ext_mappings = { '_k': {}, '_U': {'undef': ['LOWER', 'CONJ', 'XCONJ']}, '_C': {'def': ['CONJ'], 'undef': ['XCONJ']}, - '_V': {'def': ['XCONJ'], 'undef': ['CONJ']}, + # '_V': {'def': ['XCONJ'], 'undef': ['CONJ']}, '_D': {'def': ['CONJ', 'XCONJ']}, '_L': {'def': ['LOWER']}, '_LN': {'def': ['LEFT'], 'undef': ['TRANSA']}, # Handle HEMV and HEMVREV better - '_V': {'def': ['HEMV', 'HEMVREV'], 'undef': ['LOWER']}, + '_V': {'def': ['HEMV', 'HEMVREV', 'XCONJ'], 'undef': ['LOWER', 'CONJ']}, '_M': {'def': ['HEMV', 'HEMVREV', 'LOWER']}, '_n': {'undef': ['TRANS', 'TRANSA', 'CONJ', 'XCONJ']}, '_t': {'def': ['TRANS', 'TRANSA'], 'undef': ['CONJ', 'XCONJ']}, @@ -277,6 +286,34 @@ ext_mappings = { '_d': {'def': ['TRANS', 'TRANSA', 'CONJ', 'XCONJ']}, } +ext_mappings_l2 = [ + {'ext': '_NUU', 'def': ['UNIT'], 'undef': ['TRANSA'], 'for': ['s', 'd']}, + {'ext': '_NUN', 'undef': ['TRANSA', 'UNIT'], 'for': ['s', 'd']}, + {'ext': '_TLU', 'def': ['UNIT', 'TRANSA'], 'for': ['s', 'd']}, + {'ext': '_TLN', 'def': ['TRANSA'], 'undef': ['UNIT'], 'for': ['s', 'd']}, + {'ext': '_NLU', 'def': ['UNIT'], 'undef': ['TRANSA'], 'for': ['s', 'd']}, + {'ext': '_NLN', 'undef': ['TRANSA', 'UNIT'], 'for': ['s', 'd']}, + {'ext': '_TUU', 'def': ['UNIT', 'TRANSA'], 'for': ['s', 'd']}, + {'ext': '_TUN', 'def': ['TRANSA'], 'undef': ['UNIT'], 'for': ['s', 'd']}, + {'ext': '_NUU', 'def': ['UNIT', 'TRANSA=1'], 'for': ['c', 'x', 'z']}, + {'ext': '_NUN', 'def': ['TRANSA=1'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']}, + {'ext': '_TLU', 'def': ['UNIT', 'TRANSA=2'], 'for': ['c', 'x', 'z']}, + {'ext': '_TLN', 'def': ['TRANSA=2'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']}, + {'ext': '_RLU', 'def': ['UNIT', 'TRANSA=3'], 'for': ['c', 'x', 'z']}, + {'ext': '_RLN', 'def': ['TRANSA=3'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']}, + {'ext': '_CLU', 'def': ['UNIT', 'TRANSA=4'], 'for': ['c', 'x', 'z']}, + {'ext': '_CLN', 'def': ['TRANSA=4'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']}, + {'ext': '_NLU', 'def': ['UNIT', 'TRANSA=1'], 'for': ['c', 'x', 'z']}, + {'ext': '_NLN', 'def': ['TRANSA=1'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']}, + {'ext': '_TUU', 'def': ['UNIT', 'TRANSA=2'], 'for': ['s', 'd']}, + {'ext': '_TUN', 'def': ['TRANSA=2'], 'undef': ['UNIT'], 'for': ['s', 'd']}, + {'ext': '_RUU', 'def': ['UNIT', 'TRANSA=3'], 'for': ['c', 'x', 'z']}, + {'ext': '_RUN', 'def': ['TRANSA=3'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']}, + {'ext': '_CUU', 'def': ['UNIT', 'TRANSA=4'], 'for': ['c', 'x', 'z']}, + {'ext': '_CUN', 'def': ['TRANSA=4'], 'undef': ['UNIT'], 'for': ['c', 'x', 'z']} +] + + symb_defs = { '?amax': {'def': ['USE_ABS'], 'undef': ['USE_MIN']}, '?amin': {'def': ['USE_ABS', 'USE_MIN']},