From 802f0dbde153b166f533ab1660336d7832e5b616 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 3 Jan 2019 22:17:31 +0100 Subject: [PATCH] More fixes for cross-compiling ARM64 targets Fixed core naming for DYNAMIC_ARCH. Corrected GEMM_DEFAULT entries and added SYMV_P. Replaced outdated VULCAN define for ThunderX2T99 with ARMV8 to get basic definitions back. For issue #1908 --- cmake/prebuild.cmake | 45 ++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 757461008..a67c44bf5 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -87,13 +87,18 @@ endif () # Cannot run getarch on target if we are cross-compiling if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSSTORE")) # Write to config as getarch would + if (DEFINED TARGET_CORE) + set(TCORE ${TARGET_CORE}) + else() + set(TCORE ${CORE}) + endif() # TODO: Set up defines that getarch sets up based on every other target # Perhaps this should be inside a different file as it grows larger file(APPEND ${TARGET_CONF_TEMP} - "#define ${CORE}\n" - "#define CHAR_CORENAME \"${CORE}\"\n") - if ("${CORE}" STREQUAL "ARMV7") + "#define ${TCORE}\n" + "#define CHAR_CORENAME \"${TCORE}\"\n") + if ("${TCORE}" STREQUAL "ARMV7") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t65536\n" "#define L1_DATA_LINESIZE\t32\n" @@ -108,7 +113,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(SGEMM_UNROLL_N 4) set(DGEMM_UNROLL_M 4) set(DGEMM_UNROLL_N 4) - elseif ("${CORE}" STREQUAL "ARMV8") + elseif ("${TCORE}" STREQUAL "ARMV8") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t32768\n" "#define L1_DATA_LINESIZE\t64\n" @@ -118,9 +123,16 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS "#define DTB_SIZE\t4096\n" "#define L2_ASSOCIATIVE\t32\n" "#define ARMV8\n") - set(SGEMM_UNROLL_M 4) + set(SGEMM_UNROLL_M 16) set(SGEMM_UNROLL_N 4) - elseif ("${CORE}" STREQUAL "CORTEXA57" OR "${CORE}" STREQUAL "CORTEXA53") + set(DGEMM_UNROLL_M 8) + set(DGEMM_UNROLL_N 4) + set(CGEMM_UNROLL_M 8) + set(CGEMM_UNROLL_N 4) + set(ZGEMM_UNROLL_M 4) + set(ZGEMM_UNROLL_N 4) + set(SYMV_P 16) + elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53") file(APPEND ${TARGET_CONF_TEMP} "#define L1_CODE_SIZE\t32768\n" "#define L1_CODE_LINESIZE\t64\n" @@ -144,9 +156,10 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(DGEMM_UNROLL_N 4) set(CGEMM_UNROLL_M 8) set(CGEMM_UNROLL_N 4) - set(ZGEMM_UNROLL_M 8) + set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_N 4) - elseif ("${CORE}" STREQUAL "CORTEXA72" OR "${CORE}" STREQUAL "CORTEXA73") + set(SYMV_P 16) + elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73") file(APPEND ${TARGET_CONF_TEMP} "#define L1_CODE_SIZE\t49152\n" "#define L1_CODE_LINESIZE\t64\n" @@ -170,9 +183,10 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(DGEMM_UNROLL_N 4) set(CGEMM_UNROLL_M 8) set(CGEMM_UNROLL_N 4) - set(ZGEMM_UNROLL_M 8) + set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_N 4) - elseif ("${CORE}" STREQUAL "FALKOR") + set(SYMV_P 16) + elseif ("${TCORE}" STREQUAL "FALKOR") file(APPEND ${TARGET_CONF_TEMP} "#define L1_CODE_SIZE\t65536\n" "#define L1_CODE_LINESIZE\t64\n" @@ -196,9 +210,10 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(DGEMM_UNROLL_N 4) set(CGEMM_UNROLL_M 8) set(CGEMM_UNROLL_N 4) - set(ZGEMM_UNROLL_M 8) + set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_N 4) - elseif ("${CORE}" STREQUAL "THUNDERX") + set(SYMV_P 16) + elseif ("${TCORE}" STREQUAL "THUNDERX") file(APPEND ${TARGET_CONF_TEMP} "#define L1_CODE_SIZE\t32768\n" "#define L1_CODE_LINESIZE\t64\n" @@ -224,7 +239,8 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(CGEMM_UNROLL_N 2) set(ZGEMM_UNROLL_M 2) set(ZGEMM_UNROLL_N 2) - elseif ("${CORE}" STREQUAL "THUNDERX2T99") + set(SYMV_P 16) + elseif ("${TCORE}" STREQUAL "THUNDERX2T99") file(APPEND ${TARGET_CONF_TEMP} "#define L1_CODE_SIZE\t32768\n" "#define L1_CODE_LINESIZE\t64\n" @@ -240,7 +256,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS "#define L3_ASSOCIATIVE\t32\n" "#define DTB_DEFAULT_ENTRIES\t64\n" "#define DTB_SIZE\t4096\n" - "#define VULCAN\n") + "#define ARMV8\n") set(SGEMM_UNROLL_M 16) set(SGEMM_UNROLL_N 4) set(DGEMM_UNROLL_M 8) @@ -249,6 +265,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(CGEMM_UNROLL_N 4) set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_N 4) + set(SYMV_P 16) endif() # Or should this actually be NUM_CORES?